View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collections;
31  import java.util.Comparator;
32  import java.util.HashSet;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.concurrent.atomic.AtomicReference;
37  
38  import javax.servlet.ServletException;
39  import javax.servlet.http.HttpServlet;
40  import javax.servlet.http.HttpServletRequest;
41  import javax.servlet.http.HttpServletResponse;
42  
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.ClusterStatus;
48  import org.apache.hadoop.hbase.CoordinatedStateException;
49  import org.apache.hadoop.hbase.CoordinatedStateManager;
50  import org.apache.hadoop.hbase.DoNotRetryIOException;
51  import org.apache.hadoop.hbase.HBaseIOException;
52  import org.apache.hadoop.hbase.HColumnDescriptor;
53  import org.apache.hadoop.hbase.HConstants;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.HTableDescriptor;
56  import org.apache.hadoop.hbase.MasterNotRunningException;
57  import org.apache.hadoop.hbase.MetaTableAccessor;
58  import org.apache.hadoop.hbase.NamespaceDescriptor;
59  import org.apache.hadoop.hbase.NamespaceNotFoundException;
60  import org.apache.hadoop.hbase.PleaseHoldException;
61  import org.apache.hadoop.hbase.Server;
62  import org.apache.hadoop.hbase.ServerLoad;
63  import org.apache.hadoop.hbase.ServerName;
64  import org.apache.hadoop.hbase.TableDescriptors;
65  import org.apache.hadoop.hbase.TableName;
66  import org.apache.hadoop.hbase.TableNotDisabledException;
67  import org.apache.hadoop.hbase.TableNotFoundException;
68  import org.apache.hadoop.hbase.UnknownRegionException;
69  import org.apache.hadoop.hbase.classification.InterfaceAudience;
70  import org.apache.hadoop.hbase.client.MetaScanner;
71  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
72  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
73  import org.apache.hadoop.hbase.client.Result;
74  import org.apache.hadoop.hbase.client.TableState;
75  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
76  import org.apache.hadoop.hbase.exceptions.DeserializationException;
77  import org.apache.hadoop.hbase.executor.ExecutorType;
78  import org.apache.hadoop.hbase.ipc.RequestContext;
79  import org.apache.hadoop.hbase.ipc.RpcServer;
80  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
81  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
82  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
83  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
84  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
85  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
86  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
87  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
88  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
89  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
90  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
91  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
92  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
93  import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
94  import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
95  import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
96  import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
97  import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
98  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
99  import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
100 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
101 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
102 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
103 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
104 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
105 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
106 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
107 import org.apache.hadoop.hbase.regionserver.HRegionServer;
108 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
109 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
110 import org.apache.hadoop.hbase.replication.regionserver.Replication;
111 import org.apache.hadoop.hbase.security.UserProvider;
112 import org.apache.hadoop.hbase.util.Addressing;
113 import org.apache.hadoop.hbase.util.Bytes;
114 import org.apache.hadoop.hbase.util.CompressionTest;
115 import org.apache.hadoop.hbase.util.FSUtils;
116 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
117 import org.apache.hadoop.hbase.util.Pair;
118 import org.apache.hadoop.hbase.util.Threads;
119 import org.apache.hadoop.hbase.util.VersionInfo;
120 import org.apache.hadoop.hbase.util.ZKDataMigrator;
121 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
122 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
123 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
124 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
125 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
126 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
127 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
129 import org.apache.zookeeper.KeeperException;
130 import org.apache.zookeeper.Watcher;
131 import org.mortbay.jetty.Connector;
132 import org.mortbay.jetty.nio.SelectChannelConnector;
133 import org.mortbay.jetty.servlet.Context;
134 
135 import com.google.common.annotations.VisibleForTesting;
136 import com.google.common.collect.Lists;
137 import com.google.common.collect.Maps;
138 import com.google.protobuf.Descriptors;
139 import com.google.protobuf.Service;
140 
141 /**
142  * HMaster is the "master server" for HBase. An HBase cluster has one active
143  * master.  If many masters are started, all compete.  Whichever wins goes on to
144  * run the cluster.  All others park themselves in their constructor until
145  * master or cluster shutdown or until the active master loses its lease in
146  * zookeeper.  Thereafter, all running master jostle to take over master role.
147  *
148  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
149  * this case it will tell all regionservers to go down and then wait on them
150  * all reporting in that they are down.  This master will then shut itself down.
151  *
152  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
153  *
154  * @see Watcher
155  */
156 @InterfaceAudience.Private
157 @SuppressWarnings("deprecation")
158 public class HMaster extends HRegionServer implements MasterServices, Server {
159   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
160 
161   // MASTER is name of the webapp and the attribute name used stuffing this
162   //instance into web context.
163   public static final String MASTER = "master";
164 
165   // Manager and zk listener for master election
166   private ActiveMasterManager activeMasterManager;
167   // Region server tracker
168   RegionServerTracker regionServerTracker;
169   // Draining region server tracker
170   private DrainingServerTracker drainingServerTracker;
171   // Tracker for load balancer state
172   LoadBalancerTracker loadBalancerTracker;
173 
174   /** Namespace stuff */
175   private TableNamespaceManager tableNamespaceManager;
176 
177   // Metrics for the HMaster
178   final MetricsMaster metricsMaster;
179   // file system manager for the master FS operations
180   private MasterFileSystem fileSystemManager;
181 
182   // server manager to deal with region server info
183   volatile ServerManager serverManager;
184 
185   // manager of assignment nodes in zookeeper
186   AssignmentManager assignmentManager;
187 
188   // buffer for "fatal error" notices from region servers
189   // in the cluster. This is only used for assisting
190   // operations/debugging.
191   MemoryBoundedLogMessageBuffer rsFatals;
192 
193   // flag set after we become the active master (used for testing)
194   private volatile boolean isActiveMaster = false;
195 
196   // flag set after we complete initialization once active,
197   // it is not private since it's used in unit tests
198   volatile boolean initialized = false;
199 
200   // flag set after master services are started,
201   // initialization may have not completed yet.
202   volatile boolean serviceStarted = false;
203 
204   // flag set after we complete assignMeta.
205   private volatile boolean serverShutdownHandlerEnabled = false;
206 
207   LoadBalancer balancer;
208   private BalancerChore balancerChore;
209   private ClusterStatusChore clusterStatusChore;
210   private ClusterStatusPublisher clusterStatusPublisherChore = null;
211 
212   CatalogJanitor catalogJanitorChore;
213   private LogCleaner logCleaner;
214   private HFileCleaner hfileCleaner;
215 
216   MasterCoprocessorHost cpHost;
217 
218   private final boolean preLoadTableDescriptors;
219 
220   // Time stamps for when a hmaster became active
221   private long masterActiveTime;
222 
223   //should we check the compression codec type at master side, default true, HBASE-6370
224   private final boolean masterCheckCompression;
225 
226   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
227 
228   // monitor for snapshot of hbase tables
229   SnapshotManager snapshotManager;
230   // monitor for distributed procedures
231   MasterProcedureManagerHost mpmHost;
232 
233   private MasterQuotaManager quotaManager;
234 
235   // handle table states
236   private TableStateManager tableStateManager;
237 
238   /** flag used in test cases in order to simulate RS failures during master initialization */
239   private volatile boolean initializationBeforeMetaAssignment = false;
240 
241   /** jetty server for master to redirect requests to regionserver infoServer */
242   private org.mortbay.jetty.Server masterJettyServer;
243 
244   private int masterInfoPort;
245   public static class RedirectServlet extends HttpServlet {
246     private static final long serialVersionUID = 2894774810058302472L;
247     private static int regionServerInfoPort;
248 
249     @Override
250     public void doGet(HttpServletRequest request,
251         HttpServletResponse response) throws ServletException, IOException {
252       String redirectUrl = request.getScheme() + "://"
253         + request.getServerName() + ":" + regionServerInfoPort
254         + request.getRequestURI();
255       response.sendRedirect(redirectUrl);
256     }
257   }
258 
259   /**
260    * Initializes the HMaster. The steps are as follows:
261    * <p>
262    * <ol>
263    * <li>Initialize the local HRegionServer
264    * <li>Start the ActiveMasterManager.
265    * </ol>
266    * <p>
267    * Remaining steps of initialization occur in
268    * #finishActiveMasterInitialization(MonitoredTask) after
269    * the master becomes the active one.
270    *
271    * @throws KeeperException
272    * @throws IOException
273    */
274   public HMaster(final Configuration conf, CoordinatedStateManager csm)
275       throws IOException, KeeperException {
276     super(conf, csm);
277     this.rsFatals = new MemoryBoundedLogMessageBuffer(
278       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
279 
280     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
281         ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
282 
283     Replication.decorateMasterConfiguration(this.conf);
284 
285     // Hack! Maps DFSClient => Master for logs.  HDFS made this
286     // config param for task trackers, but we can piggyback off of it.
287     if (this.conf.get("mapreduce.task.attempt.id") == null) {
288       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
289     }
290 
291     //should we check the compression codec type at master side, default true, HBASE-6370
292     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
293 
294     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
295 
296     // preload table descriptor at startup
297     this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
298 
299     // Do we publish the status?
300     
301     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
302         HConstants.STATUS_PUBLISHED_DEFAULT);
303     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
304         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
305             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
306             ClusterStatusPublisher.Publisher.class);
307 
308     if (shouldPublish) {
309       if (publisherClass == null) {
310         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
311             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
312             " is not set - not publishing status");
313       } else {
314         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
315         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
316       }
317     }
318     startActiveMasterManager();
319     putUpJettyServer();
320   }
321 
322   private void putUpJettyServer() throws IOException {
323     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
324       return;
325     }
326     int infoPort = conf.getInt("hbase.master.info.port.orig",
327       HConstants.DEFAULT_MASTER_INFOPORT);
328     // -1 is for disabling info server, so no redirecting
329     if (infoPort < 0 || infoServer == null) {
330       return;
331     }
332     String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
333     if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
334       String msg =
335           "Failed to start redirecting jetty server. Address " + addr
336               + " does not belong to this host. Correct configuration parameter: "
337               + "hbase.master.info.bindAddress";
338       LOG.error(msg);
339       throw new IOException(msg);
340     }
341 
342     RedirectServlet.regionServerInfoPort = infoServer.getPort();
343     masterJettyServer = new org.mortbay.jetty.Server();
344     Connector connector = new SelectChannelConnector();
345     connector.setHost(addr);
346     connector.setPort(infoPort);
347     masterJettyServer.addConnector(connector);
348     masterJettyServer.setStopAtShutdown(true);
349     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
350     context.addServlet(RedirectServlet.class, "/*");
351     try {
352       masterJettyServer.start();
353     } catch (Exception e) {
354       throw new IOException("Failed to start redirecting jetty server", e);
355     }
356     masterInfoPort = connector.getPort();
357   }
358 
359   /**
360    * For compatibility, if failed with regionserver credentials, try the master one
361    */
362   protected void login(UserProvider user, String host) throws IOException {
363     try {
364       super.login(user, host);
365     } catch (IOException ie) {
366       user.login("hbase.master.keytab.file",
367         "hbase.master.kerberos.principal", host);
368     }
369   }
370 
371   /**
372    * If configured to put regions on active master,
373    * wait till a backup master becomes active.
374    * Otherwise, loop till the server is stopped or aborted.
375    */
376   protected void waitForMasterActive(){
377     boolean tablesOnMaster = BaseLoadBalancer.tablesOnMaster(conf);
378     while (!(tablesOnMaster && isActiveMaster)
379         && !isStopped() && !isAborted()) {
380       sleeper.sleep();
381     }
382   }
383 
384   @VisibleForTesting
385   public MasterRpcServices getMasterRpcServices() {
386     return (MasterRpcServices)rpcServices;
387   }
388 
389   public boolean balanceSwitch(final boolean b) throws IOException {
390     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
391   }
392 
393   protected String getProcessName() {
394     return MASTER;
395   }
396 
397   protected boolean canCreateBaseZNode() {
398     return true;
399   }
400 
401   protected boolean canUpdateTableDescriptor() {
402     return true;
403   }
404 
405   protected RSRpcServices createRpcServices() throws IOException {
406     return new MasterRpcServices(this);
407   }
408 
409   protected void configureInfoServer() {
410     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
411     infoServer.setAttribute(MASTER, this);
412     if (BaseLoadBalancer.tablesOnMaster(conf)) {
413       super.configureInfoServer();
414     }
415   }
416 
417   protected Class<? extends HttpServlet> getDumpServlet() {
418     return MasterDumpServlet.class;
419   }
420 
421   /**
422    * Emit the HMaster metrics, such as region in transition metrics.
423    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
424    */
425   protected void doMetrics() {
426     try {
427       if (assignmentManager != null) {
428         assignmentManager.updateRegionsInTransitionMetrics();
429       }
430     } catch (Throwable e) {
431       LOG.error("Couldn't update metrics: " + e.getMessage());
432     }
433   }
434 
435   MetricsMaster getMasterMetrics() {
436     return metricsMaster;
437   }
438 
439   /**
440    * Initialize all ZK based system trackers.
441    * @throws IOException
442    * @throws InterruptedException
443    * @throws KeeperException
444    * @throws CoordinatedStateException
445    */
446   void initializeZKBasedSystemTrackers() throws IOException,
447       InterruptedException, KeeperException, CoordinatedStateException {
448     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
449     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
450     this.loadBalancerTracker.start();
451     this.assignmentManager = new AssignmentManager(this, serverManager,
452       this.balancer, this.service, this.metricsMaster,
453       this.tableLockManager, tableStateManager);
454 
455     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
456         this.serverManager);
457     this.regionServerTracker.start();
458 
459     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
460       this.serverManager);
461     this.drainingServerTracker.start();
462 
463     // Set the cluster as up.  If new RSs, they'll be waiting on this before
464     // going ahead with their startup.
465     boolean wasUp = this.clusterStatusTracker.isClusterUp();
466     if (!wasUp) this.clusterStatusTracker.setClusterUp();
467 
468     LOG.info("Server active/primary master=" + this.serverName +
469         ", sessionid=0x" +
470         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
471         ", setting cluster-up flag (Was=" + wasUp + ")");
472 
473     // create/initialize the snapshot manager and other procedure managers
474     this.snapshotManager = new SnapshotManager();
475     this.mpmHost = new MasterProcedureManagerHost();
476     this.mpmHost.register(this.snapshotManager);
477     this.mpmHost.register(new MasterFlushTableProcedureManager());
478     this.mpmHost.loadProcedures(conf);
479     this.mpmHost.initialize(this, this.metricsMaster);
480 
481     // migrating existent table state from zk
482     for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator
483         .queryForTableStates(getZooKeeper()).entrySet()) {
484       LOG.info("Converting state from zk to new states:" + entry);
485       tableStateManager.setTableState(entry.getKey(), entry.getValue());
486     }
487     ZKUtil.deleteChildrenRecursively(getZooKeeper(), getZooKeeper().tableZNode);
488   }
489 
490   /**
491    * Finish initialization of HMaster after becoming the primary master.
492    *
493    * <ol>
494    * <li>Initialize master components - file system manager, server manager,
495    *     assignment manager, region server tracker, etc</li>
496    * <li>Start necessary service threads - balancer, catalog janior,
497    *     executor services, etc</li>
498    * <li>Set cluster as UP in ZooKeeper</li>
499    * <li>Wait for RegionServers to check-in</li>
500    * <li>Split logs and perform data recovery, if necessary</li>
501    * <li>Ensure assignment of meta/namespace regions<li>
502    * <li>Handle either fresh cluster start or master failover</li>
503    * </ol>
504    *
505    * @throws IOException
506    * @throws InterruptedException
507    * @throws KeeperException
508    * @throws CoordinatedStateException
509    */
510   private void finishActiveMasterInitialization(MonitoredTask status)
511       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
512 
513     isActiveMaster = true;
514 
515     /*
516      * We are active master now... go initialize components we need to run.
517      * Note, there may be dross in zk from previous runs; it'll get addressed
518      * below after we determine if cluster startup or failover.
519      */
520 
521     status.setStatus("Initializing Master file system");
522 
523     this.masterActiveTime = System.currentTimeMillis();
524     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
525     this.fileSystemManager = new MasterFileSystem(this, this);
526 
527     // enable table descriptors cache
528     this.tableDescriptors.setCacheOn();
529 
530     // warm-up HTDs cache on master initialization
531     if (preLoadTableDescriptors) {
532       status.setStatus("Pre-loading table descriptors");
533       this.tableDescriptors.getAll();
534     }
535 
536     // publish cluster ID
537     status.setStatus("Publishing Cluster ID in ZooKeeper");
538     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
539     this.serverManager = createServerManager(this, this);
540 
541     setupClusterConnection();
542 
543     // Invalidate all write locks held previously
544     this.tableLockManager.reapWriteLocks();
545 
546     this.tableStateManager = new TableStateManager(this);
547     this.tableStateManager.start();
548 
549     status.setStatus("Initializing ZK system trackers");
550     initializeZKBasedSystemTrackers();
551 
552     // initialize master side coprocessors before we start handling requests
553     status.setStatus("Initializing master coprocessors");
554     this.cpHost = new MasterCoprocessorHost(this, this.conf);
555 
556     // start up all service threads.
557     status.setStatus("Initializing master service threads");
558     startServiceThreads();
559 
560     // Wake up this server to check in
561     sleeper.skipSleepCycle();
562 
563     // Wait for region servers to report in
564     this.serverManager.waitForRegionServers(status);
565     // Check zk for region servers that are up but didn't register
566     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
567       // The isServerOnline check is opportunistic, correctness is handled inside
568       if (!this.serverManager.isServerOnline(sn)
569           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
570         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
571       }
572     }
573 
574     // get a list for previously failed RS which need log splitting work
575     // we recover hbase:meta region servers inside master initialization and
576     // handle other failed servers in SSH in order to start up master node ASAP
577     Set<ServerName> previouslyFailedServers = this.fileSystemManager
578         .getFailedServersFromLogFolders();
579 
580     // remove stale recovering regions from previous run
581     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
582 
583     // log splitting for hbase:meta server
584     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
585     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
586       splitMetaLogBeforeAssignment(oldMetaServerLocation);
587       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
588       // may also host user regions
589     }
590     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
591     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
592     // instead of previouslyFailedMetaRSs alone to address the following two situations:
593     // 1) the chained failure situation(recovery failed multiple times in a row).
594     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
595     // same server still has non-meta wals to be replayed so that
596     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
597     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
598     // there is no op for the server.
599     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
600 
601     this.initializationBeforeMetaAssignment = true;
602 
603     // Wait for regionserver to finish initialization.
604     if (BaseLoadBalancer.tablesOnMaster(conf)) {
605       waitForServerOnline();
606     }
607 
608     //initialize load balancer
609     this.balancer.setClusterStatus(getClusterStatus());
610     this.balancer.setMasterServices(this);
611     this.balancer.initialize();
612 
613     // Check if master is shutting down because of some issue
614     // in initializing the regionserver or the balancer.
615     if(isStopped()) return;
616 
617     // Make sure meta assigned before proceeding.
618     status.setStatus("Assigning Meta Region");
619     assignMeta(status, previouslyFailedMetaRSs);
620     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
621     // assigned when master is shutting down
622     if(isStopped()) return;
623 
624     status.setStatus("Submitting log splitting work for previously failed region servers");
625     // Master has recovered hbase:meta region server and we put
626     // other failed region servers in a queue to be handled later by SSH
627     for (ServerName tmpServer : previouslyFailedServers) {
628       this.serverManager.processDeadServer(tmpServer, true);
629     }
630 
631     // Fix up assignment manager status
632     status.setStatus("Starting assignment manager");
633     this.assignmentManager.joinCluster();
634 
635     //set cluster status again after user regions are assigned
636     this.balancer.setClusterStatus(getClusterStatus());
637 
638     // Start balancer and meta catalog janitor after meta and regions have
639     // been assigned.
640     status.setStatus("Starting balancer and catalog janitor");
641     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
642     Threads.setDaemonThreadRunning(clusterStatusChore.getThread());
643     this.balancerChore = new BalancerChore(this);
644     Threads.setDaemonThreadRunning(balancerChore.getThread());
645     this.catalogJanitorChore = new CatalogJanitor(this, this);
646     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
647 
648     status.setStatus("Starting namespace manager");
649     initNamespace();
650 
651     status.setStatus("Starting quota manager");
652     initQuotaManager();
653 
654     if (this.cpHost != null) {
655       try {
656         this.cpHost.preMasterInitialization();
657       } catch (IOException e) {
658         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
659       }
660     }
661 
662     status.markComplete("Initialization successful");
663     LOG.info("Master has completed initialization");
664     initialized = true;
665     // clear the dead servers with same host name and port of online server because we are not
666     // removing dead server with same hostname and port of rs which is trying to check in before
667     // master initialization. See HBASE-5916.
668     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
669 
670     if (this.cpHost != null) {
671       // don't let cp initialization errors kill the master
672       try {
673         this.cpHost.postStartMaster();
674       } catch (IOException ioe) {
675         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
676       }
677     }
678   }
679 
680   /**
681    * Create a {@link ServerManager} instance.
682    * @param master
683    * @param services
684    * @return An instance of {@link ServerManager}
685    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
686    * @throws IOException
687    */
688   ServerManager createServerManager(final Server master,
689       final MasterServices services)
690   throws IOException {
691     // We put this out here in a method so can do a Mockito.spy and stub it out
692     // w/ a mocked up ServerManager.
693     return new ServerManager(master, services);
694   }
695 
696   /**
697    * Check <code>hbase:meta</code> is assigned. If not, assign it.
698    * @param status MonitoredTask
699    * @param previouslyFailedMetaRSs
700    * @throws InterruptedException
701    * @throws IOException
702    * @throws KeeperException
703    */
704   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
705       throws InterruptedException, IOException, KeeperException {
706     // Work on meta region
707     int assigned = 0;
708     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
709     status.setStatus("Assigning hbase:meta region");
710 
711     // Get current meta state from zk.
712     RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper());
713 
714     RegionStates regionStates = assignmentManager.getRegionStates();
715     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO,
716       metaState.getState(), metaState.getServerName(), null);
717 
718     if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation(
719         this.getConnection(), this.getZooKeeper(), timeout)) {
720       ServerName currentMetaServer = metaState.getServerName();
721       if (serverManager.isServerOnline(currentMetaServer)) {
722         LOG.info("Meta was in transition on " + currentMetaServer);
723         assignmentManager.processRegionsInTransition(Arrays.asList(metaState));
724       } else {
725         if (currentMetaServer != null) {
726           splitMetaLogBeforeAssignment(currentMetaServer);
727           regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
728           previouslyFailedMetaRSs.add(currentMetaServer);
729         }
730         LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer);
731         assignmentManager.assignMeta();
732       }
733       assigned++;
734     }
735 
736     enableMeta(TableName.META_TABLE_NAME);
737 
738     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
739         && (!previouslyFailedMetaRSs.isEmpty())) {
740       // replay WAL edits mode need new hbase:meta RS is assigned firstly
741       status.setStatus("replaying log for Meta Region");
742       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
743     }
744 
745     // Make sure a hbase:meta location is set. We need to enable SSH here since
746     // if the meta region server is died at this time, we need it to be re-assigned
747     // by SSH so that system tables can be assigned.
748     // No need to wait for meta is assigned = 0 when meta is just verified.
749     enableServerShutdownHandler(assigned != 0);
750 
751     LOG.info("hbase:meta assigned=" + assigned + ", location="
752       + metaTableLocator.getMetaRegionLocation(this.getZooKeeper()));
753     status.setStatus("META assigned.");
754   }
755 
756   void initNamespace() throws IOException {
757     //create namespace manager
758     tableNamespaceManager = new TableNamespaceManager(this);
759     tableNamespaceManager.start();
760   }
761 
762   void initQuotaManager() throws IOException {
763     quotaManager = new MasterQuotaManager(this);
764     quotaManager.start();
765   }
766 
767   boolean isCatalogJanitorEnabled() {
768     return catalogJanitorChore != null ?
769       catalogJanitorChore.getEnabled() : false;
770   }
771 
772   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
773     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
774       // In log replay mode, we mark hbase:meta region as recovering in ZK
775       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
776       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
777       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
778     } else {
779       // In recovered.edits mode: create recovered edits file for hbase:meta server
780       this.fileSystemManager.splitMetaLog(currentMetaServer);
781     }
782   }
783 
784   private void enableServerShutdownHandler(
785       final boolean waitForMeta) throws IOException, InterruptedException {
786     // If ServerShutdownHandler is disabled, we enable it and expire those dead
787     // but not expired servers. This is required so that if meta is assigning to
788     // a server which dies after assignMeta starts assignment,
789     // SSH can re-assign it. Otherwise, we will be
790     // stuck here waiting forever if waitForMeta is specified.
791     if (!serverShutdownHandlerEnabled) {
792       serverShutdownHandlerEnabled = true;
793       this.serverManager.processQueuedDeadServers();
794     }
795 
796     if (waitForMeta) {
797       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
798     }
799   }
800 
801   private void enableMeta(TableName metaTableName) {
802     if (!this.tableStateManager.isTableState(metaTableName,
803             TableState.State.ENABLED)) {
804       this.assignmentManager.setEnabledTable(metaTableName);
805     }
806   }
807 
808   /**
809    * This function returns a set of region server names under hbase:meta recovering region ZK node
810    * @return Set of meta server names which were recorded in ZK
811    * @throws KeeperException
812    */
813   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
814     Set<ServerName> result = new HashSet<ServerName>();
815     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
816       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
817     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
818     if (regionFailedServers == null) return result;
819 
820     for(String failedServer : regionFailedServers) {
821       ServerName server = ServerName.parseServerName(failedServer);
822       result.add(server);
823     }
824     return result;
825   }
826 
827   @Override
828   public TableDescriptors getTableDescriptors() {
829     return this.tableDescriptors;
830   }
831 
832   @Override
833   public ServerManager getServerManager() {
834     return this.serverManager;
835   }
836 
837   @Override
838   public MasterFileSystem getMasterFileSystem() {
839     return this.fileSystemManager;
840   }
841 
842   @Override
843   public TableStateManager getTableStateManager() {
844     return tableStateManager;
845   }
846 
847   /*
848    * Start up all services. If any of these threads gets an unhandled exception
849    * then they just die with a logged message.  This should be fine because
850    * in general, we do not expect the master to get such unhandled exceptions
851    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
852    *  need to install an unexpected exception handler.
853    */
854   private void startServiceThreads() throws IOException{
855    // Start the executor service pools
856    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
857       conf.getInt("hbase.master.executor.openregion.threads", 5));
858    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
859       conf.getInt("hbase.master.executor.closeregion.threads", 5));
860    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
861       conf.getInt("hbase.master.executor.serverops.threads", 5));
862    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
863       conf.getInt("hbase.master.executor.serverops.threads", 5));
864    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
865       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
866 
867    // We depend on there being only one instance of this executor running
868    // at a time.  To do concurrency, would need fencing of enable/disable of
869    // tables.
870    // Any time changing this maxThreads to > 1, pls see the comment at
871    // AccessController#postCreateTableHandler
872    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
873 
874    // Start log cleaner thread
875    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
876    this.logCleaner =
877       new LogCleaner(cleanerInterval,
878          this, conf, getMasterFileSystem().getFileSystem(),
879          getMasterFileSystem().getOldLogDir());
880          Threads.setDaemonThreadRunning(logCleaner.getThread(),
881            getServerName().toShortString() + ".oldLogCleaner");
882 
883    //start the hfile archive cleaner thread
884     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
885     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
886         .getFileSystem(), archiveDir);
887     Threads.setDaemonThreadRunning(hfileCleaner.getThread(),
888       getServerName().toShortString() + ".archivedHFileCleaner");
889 
890     serviceStarted = true;
891     if (LOG.isTraceEnabled()) {
892       LOG.trace("Started service threads");
893     }
894   }
895 
896   protected void stopServiceThreads() {
897     if (masterJettyServer != null) {
898       LOG.info("Stopping master jetty server");
899       try {
900         masterJettyServer.stop();
901       } catch (Exception e) {
902         LOG.error("Failed to stop master jetty server", e);
903       }
904     }
905     super.stopServiceThreads();
906     stopChores();
907     // Wait for all the remaining region servers to report in IFF we were
908     // running a cluster shutdown AND we were NOT aborting.
909     if (!isAborted() && this.serverManager != null &&
910         this.serverManager.isClusterShutdown()) {
911       this.serverManager.letRegionServersShutdown();
912     }
913     if (LOG.isDebugEnabled()) {
914       LOG.debug("Stopping service threads");
915     }
916     // Clean up and close up shop
917     if (this.logCleaner!= null) this.logCleaner.interrupt();
918     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
919     if (this.quotaManager != null) this.quotaManager.stop();
920     if (this.activeMasterManager != null) this.activeMasterManager.stop();
921     if (this.serverManager != null) this.serverManager.stop();
922     if (this.assignmentManager != null) this.assignmentManager.stop();
923     if (this.fileSystemManager != null) this.fileSystemManager.stop();
924     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
925   }
926 
927   private void stopChores() {
928     if (this.balancerChore != null) {
929       this.balancerChore.interrupt();
930     }
931     if (this.clusterStatusChore != null) {
932       this.clusterStatusChore.interrupt();
933     }
934     if (this.catalogJanitorChore != null) {
935       this.catalogJanitorChore.interrupt();
936     }
937     if (this.clusterStatusPublisherChore != null){
938       clusterStatusPublisherChore.interrupt();
939     }
940   }
941 
942   /**
943    * @return Get remote side's InetAddress
944    * @throws UnknownHostException
945    */
946   InetAddress getRemoteInetAddress(final int port,
947       final long serverStartCode) throws UnknownHostException {
948     // Do it out here in its own little method so can fake an address when
949     // mocking up in tests.
950     InetAddress ia = RpcServer.getRemoteIp();
951 
952     // The call could be from the local regionserver,
953     // in which case, there is no remote address.
954     if (ia == null && serverStartCode == startcode) {
955       InetSocketAddress isa = rpcServices.getSocketAddress();
956       if (isa != null && isa.getPort() == port) {
957         ia = isa.getAddress();
958       }
959     }
960     return ia;
961   }
962 
963   /**
964    * @return Maximum time we should run balancer for
965    */
966   private int getBalancerCutoffTime() {
967     int balancerCutoffTime =
968       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
969     if (balancerCutoffTime == -1) {
970       // No time period set so create one
971       int balancerPeriod =
972         getConfiguration().getInt("hbase.balancer.period", 300000);
973       balancerCutoffTime = balancerPeriod;
974       // If nonsense period, set it to balancerPeriod
975       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
976     }
977     return balancerCutoffTime;
978   }
979 
980   public boolean balance() throws IOException {
981     // if master not initialized, don't run balancer.
982     if (!this.initialized) {
983       LOG.debug("Master has not been initialized, don't run balancer.");
984       return false;
985     }
986     // Do this call outside of synchronized block.
987     int maximumBalanceTime = getBalancerCutoffTime();
988     synchronized (this.balancer) {
989       // If balance not true, don't run balancer.
990       if (!this.loadBalancerTracker.isBalancerOn()) return false;
991       // Only allow one balance run at at time.
992       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
993         Map<String, RegionState> regionsInTransition =
994           this.assignmentManager.getRegionStates().getRegionsInTransition();
995         LOG.debug("Not running balancer because " + regionsInTransition.size() +
996           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
997             abbreviate(regionsInTransition.toString(), 256));
998         return false;
999       }
1000       if (this.serverManager.areDeadServersInProgress()) {
1001         LOG.debug("Not running balancer because processing dead regionserver(s): " +
1002           this.serverManager.getDeadServers());
1003         return false;
1004       }
1005 
1006       if (this.cpHost != null) {
1007         try {
1008           if (this.cpHost.preBalance()) {
1009             LOG.debug("Coprocessor bypassing balancer request");
1010             return false;
1011           }
1012         } catch (IOException ioe) {
1013           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1014           return false;
1015         }
1016       }
1017 
1018       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1019         this.assignmentManager.getRegionStates().getAssignmentsByTable();
1020 
1021       List<RegionPlan> plans = new ArrayList<RegionPlan>();
1022       //Give the balancer the current cluster state.
1023       this.balancer.setClusterStatus(getClusterStatus());
1024       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1025         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1026         if (partialPlans != null) plans.addAll(partialPlans);
1027       }
1028       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1029       int rpCount = 0;  // number of RegionPlans balanced so far
1030       long totalRegPlanExecTime = 0;
1031       if (plans != null && !plans.isEmpty()) {
1032         for (RegionPlan plan: plans) {
1033           LOG.info("balance " + plan);
1034           long balStartTime = System.currentTimeMillis();
1035           //TODO: bulk assign
1036           this.assignmentManager.balance(plan);
1037           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1038           rpCount++;
1039           if (rpCount < plans.size() &&
1040               // if performing next balance exceeds cutoff time, exit the loop
1041               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1042             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1043             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1044               maximumBalanceTime);
1045             break;
1046           }
1047         }
1048       }
1049       if (this.cpHost != null) {
1050         try {
1051           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1052         } catch (IOException ioe) {
1053           // balancing already succeeded so don't change the result
1054           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1055         }
1056       }
1057     }
1058     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1059     // Return true indicating a success.
1060     return true;
1061   }
1062 
1063   /**
1064    * @return Client info for use as prefix on an audit log string; who did an action
1065    */
1066   String getClientIdAuditPrefix() {
1067     return "Client=" + RequestContext.getRequestUserName() + "/" +
1068       RequestContext.get().getRemoteAddress();
1069   }
1070 
1071   /**
1072    * Switch for the background CatalogJanitor thread.
1073    * Used for testing.  The thread will continue to run.  It will just be a noop
1074    * if disabled.
1075    * @param b If false, the catalog janitor won't do anything.
1076    */
1077   public void setCatalogJanitorEnabled(final boolean b) {
1078     this.catalogJanitorChore.setEnabled(b);
1079   }
1080 
1081   @Override
1082   public void dispatchMergingRegions(final HRegionInfo region_a,
1083       final HRegionInfo region_b, final boolean forcible) throws IOException {
1084     checkInitialized();
1085     this.service.submit(new DispatchMergingRegionHandler(this,
1086         this.catalogJanitorChore, region_a, region_b, forcible));
1087   }
1088 
1089   void move(final byte[] encodedRegionName,
1090       final byte[] destServerName) throws HBaseIOException {
1091     RegionState regionState = assignmentManager.getRegionStates().
1092       getRegionState(Bytes.toString(encodedRegionName));
1093     if (regionState == null) {
1094       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1095     }
1096 
1097     HRegionInfo hri = regionState.getRegion();
1098     ServerName dest;
1099     if (destServerName == null || destServerName.length == 0) {
1100       LOG.info("Passed destination servername is null/empty so " +
1101         "choosing a server at random");
1102       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1103         regionState.getServerName());
1104       dest = balancer.randomAssignment(hri, destServers);
1105       if (dest == null) {
1106         LOG.debug("Unable to determine a plan to assign " + hri);
1107         return;
1108       }
1109     } else {
1110       dest = ServerName.valueOf(Bytes.toString(destServerName));
1111       if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
1112           && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
1113         // To avoid unnecessary region moving later by balancer. Don't put user
1114         // regions on master. Regions on master could be put on other region
1115         // server intentionally by test however.
1116         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1117           + " to avoid unnecessary region moving later by load balancer,"
1118           + " because it should not be on master");
1119         return;
1120       }
1121     }
1122 
1123     if (dest.equals(regionState.getServerName())) {
1124       LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1125         + " because region already assigned to the same server " + dest + ".");
1126       return;
1127     }
1128 
1129     // Now we can do the move
1130     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1131 
1132     try {
1133       checkInitialized();
1134       if (this.cpHost != null) {
1135         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1136           return;
1137         }
1138       }
1139       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1140       this.assignmentManager.balance(rp);
1141       if (this.cpHost != null) {
1142         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1143       }
1144     } catch (IOException ioe) {
1145       if (ioe instanceof HBaseIOException) {
1146         throw (HBaseIOException)ioe;
1147       }
1148       throw new HBaseIOException(ioe);
1149     }
1150   }
1151 
1152   @Override
1153   public void createTable(HTableDescriptor hTableDescriptor,
1154       byte [][] splitKeys) throws IOException {
1155     if (isStopped()) {
1156       throw new MasterNotRunningException();
1157     }
1158 
1159     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1160     getNamespaceDescriptor(namespace); // ensure namespace exists
1161 
1162     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1163     checkInitialized();
1164     sanityCheckTableDescriptor(hTableDescriptor);
1165     if (cpHost != null) {
1166       cpHost.preCreateTable(hTableDescriptor, newRegions);
1167     }
1168     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1169     this.service.submit(new CreateTableHandler(this,
1170       this.fileSystemManager, hTableDescriptor, conf,
1171       newRegions, this).prepare());
1172     if (cpHost != null) {
1173       cpHost.postCreateTable(hTableDescriptor, newRegions);
1174     }
1175 
1176   }
1177 
1178   /**
1179    * Checks whether the table conforms to some sane limits, and configured
1180    * values (compression, etc) work. Throws an exception if something is wrong.
1181    * @throws IOException
1182    */
1183   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1184     final String CONF_KEY = "hbase.table.sanity.checks";
1185     if (!conf.getBoolean(CONF_KEY, true)) {
1186       return;
1187     }
1188     String tableVal = htd.getConfigurationValue(CONF_KEY);
1189     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1190       return;
1191     }
1192 
1193     // check max file size
1194     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1195     long maxFileSize = htd.getMaxFileSize();
1196     if (maxFileSize < 0) {
1197       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1198     }
1199     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1200       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1201         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1202         + ") is too small, which might cause over splitting into unmanageable "
1203         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1204           + "if you want to bypass sanity checks");
1205     }
1206 
1207     // check flush size
1208     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1209     long flushSize = htd.getMemStoreFlushSize();
1210     if (flushSize < 0) {
1211       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1212     }
1213     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1214       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1215           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1216           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1217           + "if you want to bypass sanity checks");
1218     }
1219 
1220     // check split policy class can be loaded
1221     try {
1222       RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1223     } catch (Exception ex) {
1224       throw new DoNotRetryIOException(ex);
1225     }
1226 
1227     // check compression can be loaded
1228     checkCompression(htd);
1229 
1230     // check that we have at least 1 CF
1231     if (htd.getColumnFamilies().length == 0) {
1232       throw new DoNotRetryIOException("Table should have at least one column family "
1233           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1234     }
1235 
1236     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1237       if (hcd.getTimeToLive() <= 0) {
1238         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1239           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1240           + "if you want to bypass sanity checks");
1241       }
1242 
1243       // check blockSize
1244       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1245         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1246           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1247           + "if you want to bypass sanity checks");
1248       }
1249 
1250       // check versions
1251       if (hcd.getMinVersions() < 0) {
1252         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1253           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1254           + "if you want to bypass sanity checks");
1255       }
1256       // max versions already being checked
1257 
1258       // check replication scope
1259       if (hcd.getScope() < 0) {
1260         throw new DoNotRetryIOException("Replication scope for column family "
1261           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1262           + "or table descriptor if you want to bypass sanity checks");
1263       }
1264 
1265       // TODO: should we check coprocessors and encryption ?
1266     }
1267   }
1268 
1269   private void startActiveMasterManager() throws KeeperException {
1270     String backupZNode = ZKUtil.joinZNode(
1271       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1272     /*
1273     * Add a ZNode for ourselves in the backup master directory since we
1274     * may not become the active master. If so, we want the actual active
1275     * master to know we are backup masters, so that it won't assign
1276     * regions to us if so configured.
1277     *
1278     * If we become the active master later, ActiveMasterManager will delete
1279     * this node explicitly.  If we crash before then, ZooKeeper will delete
1280     * this node for us since it is ephemeral.
1281     */
1282     LOG.info("Adding backup master ZNode " + backupZNode);
1283     if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode,
1284         serverName, masterInfoPort)) {
1285       LOG.warn("Failed create of " + backupZNode + " by " + serverName);
1286     }
1287 
1288     activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName,
1289         masterInfoPort, this);
1290     // Start a thread to try to become the active master, so we won't block here
1291     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1292       public void run() {
1293         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1294           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1295         // If we're a backup master, stall until a primary to writes his address
1296         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1297             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1298           LOG.debug("HMaster started in backup mode. "
1299             + "Stalling until master znode is written.");
1300           // This will only be a minute or so while the cluster starts up,
1301           // so don't worry about setting watches on the parent znode
1302           while (!activeMasterManager.hasActiveMaster()) {
1303             LOG.debug("Waiting for master address ZNode to be written "
1304               + "(Also watching cluster state node)");
1305             Threads.sleep(timeout);
1306           }
1307         }
1308         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1309         status.setDescription("Master startup");
1310         try {
1311           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1312             finishActiveMasterInitialization(status);
1313           }
1314         } catch (Throwable t) {
1315           status.setStatus("Failed to become active: " + t.getMessage());
1316           LOG.fatal("Failed to become active master", t);
1317           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1318           if (t instanceof NoClassDefFoundError &&
1319               t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1320             // improved error message for this special case
1321             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1322               + "recompile HBase against Hadoop version "
1323               +  org.apache.hadoop.util.VersionInfo.getVersion()
1324               + " or change your hadoop jars to start properly", t);
1325           } else {
1326             abort("Unhandled exception. Starting shutdown.", t);
1327           }
1328         } finally {
1329           status.cleanup();
1330         }
1331       }
1332     }, getServerName().toShortString() + ".activeMasterManager"));
1333   }
1334 
1335   private void checkCompression(final HTableDescriptor htd)
1336   throws IOException {
1337     if (!this.masterCheckCompression) return;
1338     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1339       checkCompression(hcd);
1340     }
1341   }
1342 
1343   private void checkCompression(final HColumnDescriptor hcd)
1344   throws IOException {
1345     if (!this.masterCheckCompression) return;
1346     CompressionTest.testCompression(hcd.getCompression());
1347     CompressionTest.testCompression(hcd.getCompactionCompression());
1348   }
1349 
1350   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1351     byte[][] splitKeys) {
1352     long regionId = System.currentTimeMillis();
1353     HRegionInfo[] hRegionInfos = null;
1354     if (splitKeys == null || splitKeys.length == 0) {
1355       hRegionInfos = new HRegionInfo[]{new HRegionInfo(hTableDescriptor.getTableName(), null, null,
1356                 false, regionId)};
1357     } else {
1358       int numRegions = splitKeys.length + 1;
1359       hRegionInfos = new HRegionInfo[numRegions];
1360       byte[] startKey = null;
1361       byte[] endKey = null;
1362       for (int i = 0; i < numRegions; i++) {
1363         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1364         hRegionInfos[i] =
1365              new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey,
1366                  false, regionId);
1367         startKey = endKey;
1368       }
1369     }
1370     return hRegionInfos;
1371   }
1372 
1373   private static boolean isCatalogTable(final TableName tableName) {
1374     return tableName.equals(TableName.META_TABLE_NAME);
1375   }
1376 
1377   @Override
1378   public void deleteTable(final TableName tableName) throws IOException {
1379     checkInitialized();
1380     if (cpHost != null) {
1381       cpHost.preDeleteTable(tableName);
1382     }
1383     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1384     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1385     if (cpHost != null) {
1386       cpHost.postDeleteTable(tableName);
1387     }
1388   }
1389 
1390   @Override
1391   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1392     checkInitialized();
1393     if (cpHost != null) {
1394       cpHost.preTruncateTable(tableName);
1395     }
1396     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1397     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1398     handler.prepare();
1399     handler.process();
1400     if (cpHost != null) {
1401       cpHost.postTruncateTable(tableName);
1402     }
1403   }
1404 
1405   @Override
1406   public void addColumn(final TableName tableName, final HColumnDescriptor columnDescriptor)
1407       throws IOException {
1408     checkInitialized();
1409     checkCompression(columnDescriptor);
1410     if (cpHost != null) {
1411       if (cpHost.preAddColumn(tableName, columnDescriptor)) {
1412         return;
1413       }
1414     }
1415     //TODO: we should process this (and some others) in an executor
1416     new TableAddFamilyHandler(tableName, columnDescriptor, this, this).prepare().process();
1417     if (cpHost != null) {
1418       cpHost.postAddColumn(tableName, columnDescriptor);
1419     }
1420   }
1421 
1422   @Override
1423   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1424       throws IOException {
1425     checkInitialized();
1426     checkCompression(descriptor);
1427     if (cpHost != null) {
1428       if (cpHost.preModifyColumn(tableName, descriptor)) {
1429         return;
1430       }
1431     }
1432     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1433     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1434       .prepare().process();
1435     if (cpHost != null) {
1436       cpHost.postModifyColumn(tableName, descriptor);
1437     }
1438   }
1439 
1440   @Override
1441   public void deleteColumn(final TableName tableName, final byte[] columnName)
1442       throws IOException {
1443     checkInitialized();
1444     if (cpHost != null) {
1445       if (cpHost.preDeleteColumn(tableName, columnName)) {
1446         return;
1447       }
1448     }
1449     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1450     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1451     if (cpHost != null) {
1452       cpHost.postDeleteColumn(tableName, columnName);
1453     }
1454   }
1455 
1456   @Override
1457   public void enableTable(final TableName tableName) throws IOException {
1458     checkInitialized();
1459     if (cpHost != null) {
1460       cpHost.preEnableTable(tableName);
1461     }
1462     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1463     this.service.submit(new EnableTableHandler(this, tableName,
1464       assignmentManager, tableLockManager, false).prepare());
1465     if (cpHost != null) {
1466       cpHost.postEnableTable(tableName);
1467    }
1468   }
1469 
1470   @Override
1471   public void disableTable(final TableName tableName) throws IOException {
1472     checkInitialized();
1473     if (cpHost != null) {
1474       cpHost.preDisableTable(tableName);
1475     }
1476     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1477     this.service.submit(new DisableTableHandler(this, tableName,
1478       assignmentManager, tableLockManager, false).prepare());
1479     if (cpHost != null) {
1480       cpHost.postDisableTable(tableName);
1481     }
1482   }
1483 
1484   /**
1485    * Return the region and current deployment for the region containing
1486    * the given row. If the region cannot be found, returns null. If it
1487    * is found, but not currently deployed, the second element of the pair
1488    * may be null.
1489    */
1490   @VisibleForTesting // Used by TestMaster.
1491   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1492       final TableName tableName, final byte [] rowKey)
1493   throws IOException {
1494     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1495       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1496 
1497     MetaScannerVisitor visitor =
1498       new MetaScannerVisitorBase() {
1499         @Override
1500         public boolean processRow(Result data) throws IOException {
1501           if (data == null || data.size() <= 0) {
1502             return true;
1503           }
1504           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1505           if (pair == null) {
1506             return false;
1507           }
1508           if (!pair.getFirst().getTable().equals(tableName)) {
1509             return false;
1510           }
1511           result.set(pair);
1512           return true;
1513         }
1514     };
1515 
1516     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1517     return result.get();
1518   }
1519 
1520   @Override
1521   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1522       throws IOException {
1523     checkInitialized();
1524     sanityCheckTableDescriptor(descriptor);
1525     if (cpHost != null) {
1526       cpHost.preModifyTable(tableName, descriptor);
1527     }
1528     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1529     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1530     if (cpHost != null) {
1531       cpHost.postModifyTable(tableName, descriptor);
1532     }
1533   }
1534 
1535   @Override
1536   public void checkTableModifiable(final TableName tableName)
1537       throws IOException, TableNotFoundException, TableNotDisabledException {
1538     if (isCatalogTable(tableName)) {
1539       throw new IOException("Can't modify catalog tables");
1540     }
1541     if (!MetaTableAccessor.tableExists(getConnection(), tableName)) {
1542       throw new TableNotFoundException(tableName);
1543     }
1544     if (!getAssignmentManager().getTableStateManager().
1545         isTableState(tableName, TableState.State.DISABLED)) {
1546       throw new TableNotDisabledException(tableName);
1547     }
1548   }
1549 
1550   /**
1551    * @return cluster status
1552    */
1553   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1554     // Build Set of backup masters from ZK nodes
1555     List<String> backupMasterStrings;
1556     try {
1557       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1558         this.zooKeeper.backupMasterAddressesZNode);
1559     } catch (KeeperException e) {
1560       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1561       backupMasterStrings = new ArrayList<String>(0);
1562     }
1563     List<ServerName> backupMasters = new ArrayList<ServerName>(
1564                                           backupMasterStrings.size());
1565     for (String s: backupMasterStrings) {
1566       try {
1567         byte [] bytes;
1568         try {
1569           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1570               this.zooKeeper.backupMasterAddressesZNode, s));
1571         } catch (InterruptedException e) {
1572           throw new InterruptedIOException();
1573         }
1574         if (bytes != null) {
1575           ServerName sn;
1576           try {
1577             sn = ServerName.parseFrom(bytes);
1578           } catch (DeserializationException e) {
1579             LOG.warn("Failed parse, skipping registering backup server", e);
1580             continue;
1581           }
1582           backupMasters.add(sn);
1583         }
1584       } catch (KeeperException e) {
1585         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1586                  "backup servers"), e);
1587       }
1588     }
1589     Collections.sort(backupMasters, new Comparator<ServerName>() {
1590       @Override
1591       public int compare(ServerName s1, ServerName s2) {
1592         return s1.getServerName().compareTo(s2.getServerName());
1593       }});
1594 
1595     String clusterId = fileSystemManager != null ?
1596       fileSystemManager.getClusterId().toString() : null;
1597     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1598       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1599     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1600     boolean balancerOn = loadBalancerTracker != null ?
1601       loadBalancerTracker.isBalancerOn() : false;
1602     Map<ServerName, ServerLoad> onlineServers = null;
1603     Set<ServerName> deadServers = null;
1604     if (serverManager != null) {
1605       deadServers = serverManager.getDeadServers().copyServerNames();
1606       onlineServers = serverManager.getOnlineServers();
1607     }
1608     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1609       onlineServers, deadServers, serverName, backupMasters,
1610       regionsInTransition, coprocessors, balancerOn);
1611   }
1612 
1613   /**
1614    * The set of loaded coprocessors is stored in a static set. Since it's
1615    * statically allocated, it does not require that HMaster's cpHost be
1616    * initialized prior to accessing it.
1617    * @return a String representation of the set of names of the loaded
1618    * coprocessors.
1619    */
1620   public static String getLoadedCoprocessors() {
1621     return CoprocessorHost.getLoadedCoprocessors().toString();
1622   }
1623 
1624   /**
1625    * @return timestamp in millis when HMaster was started.
1626    */
1627   public long getMasterStartTime() {
1628     return startcode;
1629   }
1630 
1631   /**
1632    * @return timestamp in millis when HMaster became the active master.
1633    */
1634   public long getMasterActiveTime() {
1635     return masterActiveTime;
1636   }
1637 
1638   public int getRegionServerInfoPort(final ServerName sn) {
1639     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1640     if (info == null || info.getInfoPort() == 0) {
1641       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1642         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1643     }
1644     return info.getInfoPort();
1645   }
1646 
1647   /**
1648    * @return array of coprocessor SimpleNames.
1649    */
1650   public String[] getMasterCoprocessors() {
1651     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1652     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1653   }
1654 
1655   @Override
1656   public void abort(final String msg, final Throwable t) {
1657     if (isAborted() || isStopped()) {
1658       return;
1659     }
1660     if (cpHost != null) {
1661       // HBASE-4014: dump a list of loaded coprocessors.
1662       LOG.fatal("Master server abort: loaded coprocessors are: " +
1663           getLoadedCoprocessors());
1664     }
1665     if (t != null) LOG.fatal(msg, t);
1666     stop(msg);
1667   }
1668 
1669   @Override
1670   public ZooKeeperWatcher getZooKeeper() {
1671     return zooKeeper;
1672   }
1673 
1674   @Override
1675   public MasterCoprocessorHost getMasterCoprocessorHost() {
1676     return cpHost;
1677   }
1678 
1679   @Override
1680   public MasterQuotaManager getMasterQuotaManager() {
1681     return quotaManager;
1682   }
1683 
1684   @Override
1685   public ServerName getServerName() {
1686     return this.serverName;
1687   }
1688 
1689   @Override
1690   public AssignmentManager getAssignmentManager() {
1691     return this.assignmentManager;
1692   }
1693 
1694   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1695     return rsFatals;
1696   }
1697 
1698   public void shutdown() {
1699     if (cpHost != null) {
1700       try {
1701         cpHost.preShutdown();
1702       } catch (IOException ioe) {
1703         LOG.error("Error call master coprocessor preShutdown()", ioe);
1704       }
1705     }
1706 
1707     if (this.serverManager != null) {
1708       this.serverManager.shutdownCluster();
1709     }
1710     if (this.clusterStatusTracker != null){
1711       try {
1712         this.clusterStatusTracker.setClusterDown();
1713       } catch (KeeperException e) {
1714         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1715       }
1716     }
1717   }
1718 
1719   public void stopMaster() {
1720     if (cpHost != null) {
1721       try {
1722         cpHost.preStopMaster();
1723       } catch (IOException ioe) {
1724         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1725       }
1726     }
1727     stop("Stopped by " + Thread.currentThread().getName());
1728   }
1729 
1730   void checkServiceStarted() throws ServerNotRunningYetException {
1731     if (!serviceStarted) {
1732       throw new ServerNotRunningYetException("Server is not running yet");
1733     }
1734   }
1735 
1736   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1737     checkServiceStarted();
1738     if (!this.initialized) {
1739       throw new PleaseHoldException("Master is initializing");
1740     }
1741   }
1742 
1743   void checkNamespaceManagerReady() throws IOException {
1744     checkInitialized();
1745     if (tableNamespaceManager == null ||
1746         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1747       throw new IOException("Table Namespace Manager not ready yet, try again later");
1748     }
1749   }
1750   /**
1751    * Report whether this master is currently the active master or not.
1752    * If not active master, we are parked on ZK waiting to become active.
1753    *
1754    * This method is used for testing.
1755    *
1756    * @return true if active master, false if not.
1757    */
1758   public boolean isActiveMaster() {
1759     return isActiveMaster;
1760   }
1761 
1762   /**
1763    * Report whether this master has completed with its initialization and is
1764    * ready.  If ready, the master is also the active master.  A standby master
1765    * is never ready.
1766    *
1767    * This method is used for testing.
1768    *
1769    * @return true if master is ready to go, false if not.
1770    */
1771   @Override
1772   public boolean isInitialized() {
1773     return initialized;
1774   }
1775 
1776   /**
1777    * ServerShutdownHandlerEnabled is set false before completing
1778    * assignMeta to prevent processing of ServerShutdownHandler.
1779    * @return true if assignMeta has completed;
1780    */
1781   @Override
1782   public boolean isServerShutdownHandlerEnabled() {
1783     return this.serverShutdownHandlerEnabled;
1784   }
1785 
1786   /**
1787    * Report whether this master has started initialization and is about to do meta region assignment
1788    * @return true if master is in initialization & about to assign hbase:meta regions
1789    */
1790   public boolean isInitializationStartsMetaRegionAssignment() {
1791     return this.initializationBeforeMetaAssignment;
1792   }
1793 
1794   public void assignRegion(HRegionInfo hri) {
1795     assignmentManager.assign(hri);
1796   }
1797 
1798   /**
1799    * Compute the average load across all region servers.
1800    * Currently, this uses a very naive computation - just uses the number of
1801    * regions being served, ignoring stats about number of requests.
1802    * @return the average load
1803    */
1804   public double getAverageLoad() {
1805     if (this.assignmentManager == null) {
1806       return 0;
1807     }
1808 
1809     RegionStates regionStates = this.assignmentManager.getRegionStates();
1810     if (regionStates == null) {
1811       return 0;
1812     }
1813     return regionStates.getAverageLoad();
1814   }
1815 
1816   @Override
1817   public boolean registerService(Service instance) {
1818     /*
1819      * No stacking of instances is allowed for a single service name
1820      */
1821     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1822     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1823       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1824           " already registered, rejecting request from "+instance
1825       );
1826       return false;
1827     }
1828 
1829     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
1830     if (LOG.isDebugEnabled()) {
1831       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
1832     }
1833     return true;
1834   }
1835 
1836   /**
1837    * Utility for constructing an instance of the passed HMaster class.
1838    * @param masterClass
1839    * @param conf
1840    * @return HMaster instance.
1841    */
1842   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
1843       final Configuration conf, final CoordinatedStateManager cp)  {
1844     try {
1845       Constructor<? extends HMaster> c =
1846         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
1847       return c.newInstance(conf, cp);
1848     } catch (InvocationTargetException ite) {
1849       Throwable target = ite.getTargetException() != null?
1850         ite.getTargetException(): ite;
1851       if (target.getCause() != null) target = target.getCause();
1852       throw new RuntimeException("Failed construction of Master: " +
1853         masterClass.toString(), target);
1854     } catch (Exception e) {
1855       throw new RuntimeException("Failed construction of Master: " +
1856         masterClass.toString() + ((e.getCause() != null)?
1857           e.getCause().getMessage(): ""), e);
1858     }
1859   }
1860 
1861   /**
1862    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
1863    */
1864   public static void main(String [] args) {
1865     VersionInfo.logVersion();
1866     new HMasterCommandLine(HMaster.class).doMain(args);
1867   }
1868 
1869   public HFileCleaner getHFileCleaner() {
1870     return this.hfileCleaner;
1871   }
1872 
1873   /**
1874    * Exposed for TESTING!
1875    * @return the underlying snapshot manager
1876    */
1877   public SnapshotManager getSnapshotManagerForTesting() {
1878     return this.snapshotManager;
1879   }
1880 
1881   @Override
1882   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
1883     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1884     checkNamespaceManagerReady();
1885     if (cpHost != null) {
1886       if (cpHost.preCreateNamespace(descriptor)) {
1887         return;
1888       }
1889     }
1890     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
1891     tableNamespaceManager.create(descriptor);
1892     if (cpHost != null) {
1893       cpHost.postCreateNamespace(descriptor);
1894     }
1895   }
1896 
1897   @Override
1898   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
1899     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1900     checkNamespaceManagerReady();
1901     if (cpHost != null) {
1902       if (cpHost.preModifyNamespace(descriptor)) {
1903         return;
1904       }
1905     }
1906     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1907     tableNamespaceManager.update(descriptor);
1908     if (cpHost != null) {
1909       cpHost.postModifyNamespace(descriptor);
1910     }
1911   }
1912 
1913   @Override
1914   public void deleteNamespace(String name) throws IOException {
1915     checkNamespaceManagerReady();
1916     if (cpHost != null) {
1917       if (cpHost.preDeleteNamespace(name)) {
1918         return;
1919       }
1920     }
1921     LOG.info(getClientIdAuditPrefix() + " delete " + name);
1922     tableNamespaceManager.remove(name);
1923     if (cpHost != null) {
1924       cpHost.postDeleteNamespace(name);
1925     }
1926   }
1927 
1928   @Override
1929   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
1930     checkNamespaceManagerReady();
1931     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
1932     if (nsd == null) {
1933       throw new NamespaceNotFoundException(name);
1934     }
1935     return nsd;
1936   }
1937 
1938   @Override
1939   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
1940     checkNamespaceManagerReady();
1941     return Lists.newArrayList(tableNamespaceManager.list());
1942   }
1943 
1944   @Override
1945   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
1946     getNamespaceDescriptor(name); // check that namespace exists
1947     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
1948   }
1949 
1950   @Override
1951   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
1952     List<TableName> tableNames = Lists.newArrayList();
1953     getNamespaceDescriptor(name); // check that namespace exists
1954     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
1955       tableNames.add(descriptor.getTableName());
1956     }
1957     return tableNames;
1958   }
1959 }