View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.Comparator;
31  import java.util.HashSet;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.concurrent.atomic.AtomicReference;
36  
37  import javax.servlet.ServletException;
38  import javax.servlet.http.HttpServlet;
39  import javax.servlet.http.HttpServletRequest;
40  import javax.servlet.http.HttpServletResponse;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.classification.InterfaceAudience;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.ClusterStatus;
48  import org.apache.hadoop.hbase.CoordinatedStateException;
49  import org.apache.hadoop.hbase.DoNotRetryIOException;
50  import org.apache.hadoop.hbase.HBaseIOException;
51  import org.apache.hadoop.hbase.HColumnDescriptor;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.HRegionInfo;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.MasterNotRunningException;
56  import org.apache.hadoop.hbase.NamespaceDescriptor;
57  import org.apache.hadoop.hbase.NamespaceNotFoundException;
58  import org.apache.hadoop.hbase.PleaseHoldException;
59  import org.apache.hadoop.hbase.Server;
60  import org.apache.hadoop.hbase.ServerLoad;
61  import org.apache.hadoop.hbase.ServerName;
62  import org.apache.hadoop.hbase.TableDescriptors;
63  import org.apache.hadoop.hbase.TableName;
64  import org.apache.hadoop.hbase.TableNotDisabledException;
65  import org.apache.hadoop.hbase.TableNotFoundException;
66  import org.apache.hadoop.hbase.UnknownRegionException;
67  import org.apache.hadoop.hbase.catalog.MetaReader;
68  import org.apache.hadoop.hbase.client.MetaScanner;
69  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
70  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
71  import org.apache.hadoop.hbase.client.Result;
72  import org.apache.hadoop.hbase.CoordinatedStateManager;
73  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
74  import org.apache.hadoop.hbase.exceptions.DeserializationException;
75  import org.apache.hadoop.hbase.executor.ExecutorType;
76  import org.apache.hadoop.hbase.ipc.RequestContext;
77  import org.apache.hadoop.hbase.ipc.RpcServer;
78  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
79  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
80  import org.apache.hadoop.hbase.master.RegionState.State;
81  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
82  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
83  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
84  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
85  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
86  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
87  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
88  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
89  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
90  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
91  import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
92  import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
93  import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
94  import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
95  import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
96  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
97  import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
98  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
99  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
100 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
101 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
102 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
103 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
104 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
105 import org.apache.hadoop.hbase.regionserver.HRegionServer;
106 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
107 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
108 import org.apache.hadoop.hbase.replication.regionserver.Replication;
109 import org.apache.hadoop.hbase.security.UserProvider;
110 import org.apache.hadoop.hbase.util.Bytes;
111 import org.apache.hadoop.hbase.util.CompressionTest;
112 import org.apache.hadoop.hbase.util.FSUtils;
113 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
114 import org.apache.hadoop.hbase.util.Pair;
115 import org.apache.hadoop.hbase.util.Threads;
116 import org.apache.hadoop.hbase.util.VersionInfo;
117 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
118 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
119 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
120 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
121 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
122 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
123 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
124 import org.apache.zookeeper.KeeperException;
125 import org.apache.zookeeper.Watcher;
126 import org.mortbay.jetty.Connector;
127 import org.mortbay.jetty.nio.SelectChannelConnector;
128 import org.mortbay.jetty.servlet.Context;
129 
130 import com.google.common.annotations.VisibleForTesting;
131 import com.google.common.collect.Lists;
132 import com.google.common.collect.Maps;
133 import com.google.protobuf.Descriptors;
134 import com.google.protobuf.Service;
135 
136 /**
137  * HMaster is the "master server" for HBase. An HBase cluster has one active
138  * master.  If many masters are started, all compete.  Whichever wins goes on to
139  * run the cluster.  All others park themselves in their constructor until
140  * master or cluster shutdown or until the active master loses its lease in
141  * zookeeper.  Thereafter, all running master jostle to take over master role.
142  *
143  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
144  * this case it will tell all regionservers to go down and then wait on them
145  * all reporting in that they are down.  This master will then shut itself down.
146  *
147  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
148  *
149  * @see Watcher
150  */
151 @InterfaceAudience.Private
152 @SuppressWarnings("deprecation")
153 public class HMaster extends HRegionServer implements MasterServices, Server {
154   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
155 
156   // MASTER is name of the webapp and the attribute name used stuffing this
157   //instance into web context.
158   public static final String MASTER = "master";
159 
160   // Manager and zk listener for master election
161   private ActiveMasterManager activeMasterManager;
162   // Region server tracker
163   RegionServerTracker regionServerTracker;
164   // Draining region server tracker
165   private DrainingServerTracker drainingServerTracker;
166   // Tracker for load balancer state
167   LoadBalancerTracker loadBalancerTracker;
168 
169   /** Namespace stuff */
170   private TableNamespaceManager tableNamespaceManager;
171   private NamespaceJanitor namespaceJanitorChore;
172 
173   // Metrics for the HMaster
174   final MetricsMaster metricsMaster;
175   // file system manager for the master FS operations
176   private MasterFileSystem fileSystemManager;
177 
178   // server manager to deal with region server info
179   volatile ServerManager serverManager;
180 
181   // manager of assignment nodes in zookeeper
182   AssignmentManager assignmentManager;
183 
184   // buffer for "fatal error" notices from region servers
185   // in the cluster. This is only used for assisting
186   // operations/debugging.
187   MemoryBoundedLogMessageBuffer rsFatals;
188 
189   // flag set after we become the active master (used for testing)
190   private volatile boolean isActiveMaster = false;
191 
192   // flag set after we complete initialization once active,
193   // it is not private since it's used in unit tests
194   volatile boolean initialized = false;
195 
196   // flag set after master services are started,
197   // initialization may have not completed yet.
198   volatile boolean serviceStarted = false;
199 
200   // flag set after we complete assignMeta.
201   private volatile boolean serverShutdownHandlerEnabled = false;
202 
203   LoadBalancer balancer;
204   private BalancerChore balancerChore;
205   private ClusterStatusChore clusterStatusChore;
206   private ClusterStatusPublisher clusterStatusPublisherChore = null;
207 
208   CatalogJanitor catalogJanitorChore;
209   private LogCleaner logCleaner;
210   private HFileCleaner hfileCleaner;
211 
212   MasterCoprocessorHost cpHost;
213 
214   // Time stamps for when a hmaster became active
215   private long masterActiveTime;
216 
217   //should we check the compression codec type at master side, default true, HBASE-6370
218   private final boolean masterCheckCompression;
219 
220   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
221 
222   // monitor for snapshot of hbase tables
223   SnapshotManager snapshotManager;
224   // monitor for distributed procedures
225   MasterProcedureManagerHost mpmHost;
226 
227   /** flag used in test cases in order to simulate RS failures during master initialization */
228   private volatile boolean initializationBeforeMetaAssignment = false;
229 
230   /** jetty server for master to redirect requests to regionserver infoServer */
231   private org.mortbay.jetty.Server masterJettyServer;
232 
233   public static class RedirectServlet extends HttpServlet {
234     private static final long serialVersionUID = 2894774810058302472L;
235     private static int regionServerInfoPort;
236 
237     @Override
238     public void doGet(HttpServletRequest request,
239         HttpServletResponse response) throws ServletException, IOException {
240       String redirectUrl = request.getScheme() + "://"
241         + request.getServerName() + ":" + regionServerInfoPort
242         + request.getRequestURI();
243       response.sendRedirect(redirectUrl);
244     }
245   }
246 
247   /**
248    * Initializes the HMaster. The steps are as follows:
249    * <p>
250    * <ol>
251    * <li>Initialize the local HRegionServer
252    * <li>Start the ActiveMasterManager.
253    * </ol>
254    * <p>
255    * Remaining steps of initialization occur in
256    * {@link #finishActiveMasterInitialization(MonitoredTask)} after
257    * the master becomes the active one.
258    *
259    * @throws InterruptedException
260    * @throws KeeperException
261    * @throws IOException
262    */
263   public HMaster(final Configuration conf, CoordinatedStateManager csm)
264       throws IOException, KeeperException, InterruptedException {
265     super(conf, csm);
266     this.rsFatals = new MemoryBoundedLogMessageBuffer(
267       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
268 
269     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
270         ", hbase.cluster.distributed=" + this.conf.getBoolean("hbase.cluster.distributed", false));
271 
272     Replication.decorateMasterConfiguration(this.conf);
273 
274     // Hack! Maps DFSClient => Master for logs.  HDFS made this
275     // config param for task trackers, but we can piggyback off of it.
276     if (this.conf.get("mapreduce.task.attempt.id") == null) {
277       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
278     }
279 
280     //should we check the compression codec type at master side, default true, HBASE-6370
281     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
282 
283     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
284 
285     // Do we publish the status?
286     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
287         HConstants.STATUS_PUBLISHED_DEFAULT);
288     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
289         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
290             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
291             ClusterStatusPublisher.Publisher.class);
292 
293     if (shouldPublish) {
294       if (publisherClass == null) {
295         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
296             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
297             " is not set - not publishing status");
298       } else {
299         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
300         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
301       }
302     }
303     startActiveMasterManager();
304     putUpJettyServer();
305   }
306 
307   private void putUpJettyServer() throws IOException {
308     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
309       return;
310     }
311     int infoPort = conf.getInt("hbase.master.info.port.orig",
312       HConstants.DEFAULT_MASTER_INFOPORT);
313     // -1 is for disabling info server, so no redirecting
314     if (infoPort < 0 || infoServer == null) {
315       return;
316     }
317 
318     RedirectServlet.regionServerInfoPort = infoServer.getPort();
319     masterJettyServer = new org.mortbay.jetty.Server();
320     Connector connector = new SelectChannelConnector();
321     connector.setHost(conf.get("hbase.master.info.bindAddress", "0.0.0.0"));
322     connector.setPort(infoPort);
323     masterJettyServer.addConnector(connector);
324     masterJettyServer.setStopAtShutdown(true);
325     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
326     context.addServlet(RedirectServlet.class, "/*");
327     try {
328       masterJettyServer.start();
329     } catch (Exception e) {
330       throw new IOException("Failed to start redirecting jetty server", e);
331     }
332   }
333 
334   /**
335    * For compatibility, if failed with regionserver credentials, try the master one
336    */
337   protected void login(UserProvider user, String host) throws IOException {
338     try {
339       super.login(user, host);
340     } catch (IOException ie) {
341       user.login("hbase.master.keytab.file",
342         "hbase.master.kerberos.principal", host);
343     }
344   }
345 
346   @VisibleForTesting
347   public MasterRpcServices getMasterRpcServices() {
348     return (MasterRpcServices)rpcServices;
349   }
350 
351   public boolean balanceSwitch(final boolean b) throws IOException {
352     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
353   }
354 
355   protected String getProcessName() {
356     return MASTER;
357   }
358 
359   protected boolean canCreateBaseZNode() {
360     return true;
361   }
362 
363   protected boolean canUpdateTableDescriptor() {
364     return true;
365   }
366 
367   protected RSRpcServices createRpcServices() throws IOException {
368     return new MasterRpcServices(this);
369   }
370 
371   protected void configureInfoServer() {
372     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
373     infoServer.setAttribute(MASTER, this);
374     super.configureInfoServer();
375   }
376 
377   protected Class<? extends HttpServlet> getDumpServlet() {
378     return MasterDumpServlet.class;
379   }
380 
381   /**
382    * Emit the HMaster metrics, such as region in transition metrics.
383    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
384    */
385   protected void doMetrics() {
386     try {
387       if (assignmentManager != null) {
388         assignmentManager.updateRegionsInTransitionMetrics();
389       }
390     } catch (Throwable e) {
391       LOG.error("Couldn't update metrics: " + e.getMessage());
392     }
393   }
394 
395   MetricsMaster getMasterMetrics() {
396     return metricsMaster;
397   }
398 
399   /**
400    * Initialize all ZK based system trackers.
401    * @throws IOException
402    * @throws InterruptedException
403    * @throws KeeperException
404    * @throws CoordinatedStateException
405    */
406   void initializeZKBasedSystemTrackers() throws IOException,
407       InterruptedException, KeeperException, CoordinatedStateException {
408     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
409     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
410     this.loadBalancerTracker.start();
411     this.assignmentManager = new AssignmentManager(this, serverManager,
412       this.catalogTracker, this.balancer, this.service, this.metricsMaster,
413       this.tableLockManager);
414     zooKeeper.registerListenerFirst(assignmentManager);
415 
416     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
417         this.serverManager);
418     this.regionServerTracker.start();
419 
420     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
421       this.serverManager);
422     this.drainingServerTracker.start();
423 
424     // Set the cluster as up.  If new RSs, they'll be waiting on this before
425     // going ahead with their startup.
426     boolean wasUp = this.clusterStatusTracker.isClusterUp();
427     if (!wasUp) this.clusterStatusTracker.setClusterUp();
428 
429     LOG.info("Server active/primary master=" + this.serverName +
430         ", sessionid=0x" +
431         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
432         ", setting cluster-up flag (Was=" + wasUp + ")");
433 
434     // create/initialize the snapshot manager and other procedure managers
435     this.snapshotManager = new SnapshotManager();
436     this.mpmHost = new MasterProcedureManagerHost();
437     this.mpmHost.register(this.snapshotManager);
438     this.mpmHost.register(new MasterFlushTableProcedureManager());
439     this.mpmHost.loadProcedures(conf);
440     this.mpmHost.initialize(this, this.metricsMaster);
441   }
442 
443   /**
444    * Finish initialization of HMaster after becoming the primary master.
445    *
446    * <ol>
447    * <li>Initialize master components - file system manager, server manager,
448    *     assignment manager, region server tracker, etc</li>
449    * <li>Start necessary service threads - balancer, catalog janior,
450    *     executor services, etc</li>
451    * <li>Set cluster as UP in ZooKeeper</li>
452    * <li>Wait for RegionServers to check-in</li>
453    * <li>Split logs and perform data recovery, if necessary</li>
454    * <li>Ensure assignment of meta/namespace regions<li>
455    * <li>Handle either fresh cluster start or master failover</li>
456    * </ol>
457    *
458    * @throws IOException
459    * @throws InterruptedException
460    * @throws KeeperException
461    * @throws CoordinatedStateException
462    */
463   private void finishActiveMasterInitialization(MonitoredTask status)
464       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
465 
466     isActiveMaster = true;
467 
468     /*
469      * We are active master now... go initialize components we need to run.
470      * Note, there may be dross in zk from previous runs; it'll get addressed
471      * below after we determine if cluster startup or failover.
472      */
473 
474     status.setStatus("Initializing Master file system");
475 
476     this.masterActiveTime = System.currentTimeMillis();
477     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
478     this.fileSystemManager = new MasterFileSystem(this, this);
479 
480     // publish cluster ID
481     status.setStatus("Publishing Cluster ID in ZooKeeper");
482     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
483     this.serverManager = createServerManager(this, this);
484 
485     // Now we have the cluster ID, start catalog tracker
486     startCatalogTracker();
487 
488     // Invalidate all write locks held previously
489     this.tableLockManager.reapWriteLocks();
490 
491     status.setStatus("Initializing ZK system trackers");
492     initializeZKBasedSystemTrackers();
493 
494     // initialize master side coprocessors before we start handling requests
495     status.setStatus("Initializing master coprocessors");
496     this.cpHost = new MasterCoprocessorHost(this, this.conf);
497 
498     // start up all service threads.
499     status.setStatus("Initializing master service threads");
500     startServiceThreads();
501 
502     // Wake up this server to check in
503     sleeper.skipSleepCycle();
504 
505     // Wait for region servers to report in
506     this.serverManager.waitForRegionServers(status);
507     // Check zk for region servers that are up but didn't register
508     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
509       // The isServerOnline check is opportunistic, correctness is handled inside
510       if (!this.serverManager.isServerOnline(sn)
511           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
512         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
513       }
514     }
515 
516     // get a list for previously failed RS which need log splitting work
517     // we recover hbase:meta region servers inside master initialization and
518     // handle other failed servers in SSH in order to start up master node ASAP
519     Set<ServerName> previouslyFailedServers = this.fileSystemManager
520         .getFailedServersFromLogFolders();
521 
522     // remove stale recovering regions from previous run
523     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
524 
525     // log splitting for hbase:meta server
526     ServerName oldMetaServerLocation = this.catalogTracker.getMetaLocation();
527     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
528       splitMetaLogBeforeAssignment(oldMetaServerLocation);
529       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
530       // may also host user regions
531     }
532     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
533     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
534     // instead of previouslyFailedMetaRSs alone to address the following two situations:
535     // 1) the chained failure situation(recovery failed multiple times in a row).
536     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
537     // same server still has non-meta wals to be replayed so that
538     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
539     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
540     // there is no op for the server.
541     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
542 
543     this.initializationBeforeMetaAssignment = true;
544 
545     // Wait for regionserver to finish initialization.
546     synchronized (online) {
547       while (!isStopped() && !isOnline()) {
548         online.wait(100);
549       }
550     }
551 
552     //initialize load balancer
553     this.balancer.setClusterStatus(getClusterStatus());
554     this.balancer.setMasterServices(this);
555     this.balancer.initialize();
556 
557     // Check if master is shutting down because of some issue
558     // in initializing the regionserver or the balancer.
559     if(isStopped()) return;
560 
561     // Make sure meta assigned before proceeding.
562     status.setStatus("Assigning Meta Region");
563     assignMeta(status, previouslyFailedMetaRSs);
564     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
565     // assigned when master is shutting down
566     if(isStopped()) return;
567 
568     status.setStatus("Submitting log splitting work for previously failed region servers");
569     // Master has recovered hbase:meta region server and we put
570     // other failed region servers in a queue to be handled later by SSH
571     for (ServerName tmpServer : previouslyFailedServers) {
572       this.serverManager.processDeadServer(tmpServer, true);
573     }
574 
575     // Update meta with new PB serialization if required. i.e migrate all HRI to PB serialization
576     // in meta. This must happen before we assign all user regions or else the assignment will
577     // fail.
578     org.apache.hadoop.hbase.catalog.MetaMigrationConvertingToPB
579       .updateMetaIfNecessary(this);
580 
581     // Fix up assignment manager status
582     status.setStatus("Starting assignment manager");
583     this.assignmentManager.joinCluster();
584 
585     //set cluster status again after user regions are assigned
586     this.balancer.setClusterStatus(getClusterStatus());
587 
588     // Start balancer and meta catalog janitor after meta and regions have
589     // been assigned.
590     status.setStatus("Starting balancer and catalog janitor");
591     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
592     Threads.setDaemonThreadRunning(clusterStatusChore.getThread());
593     this.balancerChore = new BalancerChore(this);
594     Threads.setDaemonThreadRunning(balancerChore.getThread());
595     this.catalogJanitorChore = new CatalogJanitor(this, this);
596     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
597 
598     status.setStatus("Starting namespace manager");
599     initNamespace();
600 
601     if (this.cpHost != null) {
602       try {
603         this.cpHost.preMasterInitialization();
604       } catch (IOException e) {
605         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
606       }
607     }
608 
609     status.markComplete("Initialization successful");
610     LOG.info("Master has completed initialization");
611     initialized = true;
612     // clear the dead servers with same host name and port of online server because we are not
613     // removing dead server with same hostname and port of rs which is trying to check in before
614     // master initialization. See HBASE-5916.
615     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
616 
617     if (this.cpHost != null) {
618       // don't let cp initialization errors kill the master
619       try {
620         this.cpHost.postStartMaster();
621       } catch (IOException ioe) {
622         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
623       }
624     }
625   }
626 
627   /**
628    * Useful for testing purpose also where we have
629    * master restart scenarios.
630    */
631   protected void startCatalogJanitorChore() {
632     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
633   }
634 
635   /**
636    * Useful for testing purpose also where we have
637    * master restart scenarios.
638    */
639   protected void startNamespaceJanitorChore() {
640     Threads.setDaemonThreadRunning(namespaceJanitorChore.getThread());
641   }
642 
643   /**
644    * Create a {@link ServerManager} instance.
645    * @param master
646    * @param services
647    * @return An instance of {@link ServerManager}
648    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
649    * @throws IOException
650    */
651   ServerManager createServerManager(final Server master,
652       final MasterServices services)
653   throws IOException {
654     // We put this out here in a method so can do a Mockito.spy and stub it out
655     // w/ a mocked up ServerManager.
656     return new ServerManager(master, services);
657   }
658 
659   /**
660    * Check <code>hbase:meta</code> is assigned. If not, assign it.
661    * @param status MonitoredTask
662    * @param previouslyFailedMetaRSs
663    * @throws InterruptedException
664    * @throws IOException
665    * @throws KeeperException
666    */
667   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
668       throws InterruptedException, IOException, KeeperException {
669     // Work on meta region
670     int assigned = 0;
671     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
672     status.setStatus("Assigning hbase:meta region");
673 
674     RegionStates regionStates = assignmentManager.getRegionStates();
675     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO);
676     boolean rit = this.assignmentManager
677       .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
678     boolean metaRegionLocation = this.catalogTracker.verifyMetaRegionLocation(timeout);
679     ServerName currentMetaServer = this.catalogTracker.getMetaLocation();
680     if (!metaRegionLocation) {
681       // Meta location is not verified. It should be in transition, or offline.
682       // We will wait for it to be assigned in enableSSHandWaitForMeta below.
683       assigned++;
684       if (!rit) {
685         // Assign meta since not already in transition
686         if (currentMetaServer != null) {
687           // If the meta server is not known to be dead or online,
688           // just split the meta log, and don't expire it since this
689           // could be a full cluster restart. Otherwise, we will think
690           // this is a failover and lose previous region locations.
691           // If it is really a failover case, AM will find out in rebuilding
692           // user regions. Otherwise, we are good since all logs are split
693           // or known to be replayed before user regions are assigned.
694           if (serverManager.isServerOnline(currentMetaServer)) {
695             LOG.info("Forcing expire of " + currentMetaServer);
696             serverManager.expireServer(currentMetaServer);
697           }
698           splitMetaLogBeforeAssignment(currentMetaServer);
699           previouslyFailedMetaRSs.add(currentMetaServer);
700         }
701         assignmentManager.assignMeta();
702       }
703     } else {
704       // Region already assigned. We didn't assign it. Add to in-memory state.
705       regionStates.updateRegionState(
706         HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer);
707       this.assignmentManager.regionOnline(
708         HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer);
709     }
710 
711     enableMeta(TableName.META_TABLE_NAME);
712 
713     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
714         && (!previouslyFailedMetaRSs.isEmpty())) {
715       // replay WAL edits mode need new hbase:meta RS is assigned firstly
716       status.setStatus("replaying log for Meta Region");
717       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
718     }
719 
720     // Make sure a hbase:meta location is set. We need to enable SSH here since
721     // if the meta region server is died at this time, we need it to be re-assigned
722     // by SSH so that system tables can be assigned.
723     // No need to wait for meta is assigned = 0 when meta is just verified.
724     enableServerShutdownHandler(assigned != 0);
725 
726     LOG.info("hbase:meta assigned=" + assigned + ", rit=" + rit +
727       ", location=" + catalogTracker.getMetaLocation());
728     status.setStatus("META assigned.");
729   }
730 
731   void initNamespace() throws IOException {
732     //create namespace manager
733     tableNamespaceManager = new TableNamespaceManager(this);
734     tableNamespaceManager.start();
735   }
736 
737   boolean isCatalogJanitorEnabled() {
738     return catalogJanitorChore != null ?
739       catalogJanitorChore.getEnabled() : false;
740   }
741 
742   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
743     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
744       // In log replay mode, we mark hbase:meta region as recovering in ZK
745       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
746       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
747       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
748     } else {
749       // In recovered.edits mode: create recovered edits file for hbase:meta server
750       this.fileSystemManager.splitMetaLog(currentMetaServer);
751     }
752   }
753 
754   private void enableServerShutdownHandler(
755       final boolean waitForMeta) throws IOException, InterruptedException {
756     // If ServerShutdownHandler is disabled, we enable it and expire those dead
757     // but not expired servers. This is required so that if meta is assigning to
758     // a server which dies after assignMeta starts assignment,
759     // SSH can re-assign it. Otherwise, we will be
760     // stuck here waiting forever if waitForMeta is specified.
761     if (!serverShutdownHandlerEnabled) {
762       serverShutdownHandlerEnabled = true;
763       this.serverManager.processQueuedDeadServers();
764     }
765 
766     if (waitForMeta) {
767       this.catalogTracker.waitForMeta();
768       // Above check waits for general meta availability but this does not
769       // guarantee that the transition has completed
770       this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
771     }
772   }
773 
774   private void enableMeta(TableName metaTableName) {
775     if (!this.assignmentManager.getTableStateManager().isTableState(metaTableName,
776         ZooKeeperProtos.Table.State.ENABLED)) {
777       this.assignmentManager.setEnabledTable(metaTableName);
778     }
779   }
780 
781   /**
782    * This function returns a set of region server names under hbase:meta recovering region ZK node
783    * @return Set of meta server names which were recorded in ZK
784    * @throws KeeperException
785    */
786   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
787     Set<ServerName> result = new HashSet<ServerName>();
788     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
789       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
790     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
791     if (regionFailedServers == null) return result;
792 
793     for(String failedServer : regionFailedServers) {
794       ServerName server = ServerName.parseServerName(failedServer);
795       result.add(server);
796     }
797     return result;
798   }
799 
800   @Override
801   public TableDescriptors getTableDescriptors() {
802     return this.tableDescriptors;
803   }
804 
805   @Override
806   public ServerManager getServerManager() {
807     return this.serverManager;
808   }
809 
810   @Override
811   public MasterFileSystem getMasterFileSystem() {
812     return this.fileSystemManager;
813   }
814 
815   /*
816    * Start up all services. If any of these threads gets an unhandled exception
817    * then they just die with a logged message.  This should be fine because
818    * in general, we do not expect the master to get such unhandled exceptions
819    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
820    *  need to install an unexpected exception handler.
821    */
822   private void startServiceThreads() throws IOException{
823    // Start the executor service pools
824    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
825       conf.getInt("hbase.master.executor.openregion.threads", 5));
826    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
827       conf.getInt("hbase.master.executor.closeregion.threads", 5));
828    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
829       conf.getInt("hbase.master.executor.serverops.threads", 5));
830    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
831       conf.getInt("hbase.master.executor.serverops.threads", 5));
832    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
833       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
834 
835    // We depend on there being only one instance of this executor running
836    // at a time.  To do concurrency, would need fencing of enable/disable of
837    // tables.
838    // Any time changing this maxThreads to > 1, pls see the comment at
839    // AccessController#postCreateTableHandler
840    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
841 
842    // Start log cleaner thread
843    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
844    this.logCleaner =
845       new LogCleaner(cleanerInterval,
846          this, conf, getMasterFileSystem().getFileSystem(),
847          getMasterFileSystem().getOldLogDir());
848          Threads.setDaemonThreadRunning(logCleaner.getThread(), getName() + ".oldLogCleaner");
849 
850    //start the hfile archive cleaner thread
851     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
852     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
853         .getFileSystem(), archiveDir);
854     Threads.setDaemonThreadRunning(hfileCleaner.getThread(),
855       getName() + ".archivedHFileCleaner");
856 
857     serviceStarted = true;
858     if (LOG.isTraceEnabled()) {
859       LOG.trace("Started service threads");
860     }
861   }
862 
863   protected void stopServiceThreads() {
864     if (masterJettyServer != null) {
865       LOG.info("Stopping master jetty server");
866       try {
867         masterJettyServer.stop();
868       } catch (Exception e) {
869         LOG.error("Failed to stop master jetty server", e);
870       }
871     }
872     super.stopServiceThreads();
873     stopChores();
874     // Wait for all the remaining region servers to report in IFF we were
875     // running a cluster shutdown AND we were NOT aborting.
876     if (!isAborted() && this.serverManager != null &&
877         this.serverManager.isClusterShutdown()) {
878       this.serverManager.letRegionServersShutdown();
879     }
880     if (LOG.isDebugEnabled()) {
881       LOG.debug("Stopping service threads");
882     }
883     // Clean up and close up shop
884     if (this.logCleaner!= null) this.logCleaner.interrupt();
885     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
886     if (this.activeMasterManager != null) this.activeMasterManager.stop();
887     if (this.serverManager != null) this.serverManager.stop();
888     if (this.assignmentManager != null) this.assignmentManager.stop();
889     if (this.fileSystemManager != null) this.fileSystemManager.stop();
890     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
891   }
892 
893   private void stopChores() {
894     if (this.balancerChore != null) {
895       this.balancerChore.interrupt();
896     }
897     if (this.clusterStatusChore != null) {
898       this.clusterStatusChore.interrupt();
899     }
900     if (this.catalogJanitorChore != null) {
901       this.catalogJanitorChore.interrupt();
902     }
903     if (this.clusterStatusPublisherChore != null){
904       clusterStatusPublisherChore.interrupt();
905     }
906     if (this.namespaceJanitorChore != null){
907       namespaceJanitorChore.interrupt();
908     }
909   }
910 
911   /**
912    * @return Get remote side's InetAddress
913    * @throws UnknownHostException
914    */
915   InetAddress getRemoteInetAddress(final int port,
916       final long serverStartCode) throws UnknownHostException {
917     // Do it out here in its own little method so can fake an address when
918     // mocking up in tests.
919     InetAddress ia = RpcServer.getRemoteIp();
920 
921     // The call could be from the local regionserver,
922     // in which case, there is no remote address.
923     if (ia == null && serverStartCode == startcode) {
924       InetSocketAddress isa = rpcServices.getSocketAddress();
925       if (isa != null && isa.getPort() == port) {
926         ia = isa.getAddress();
927       }
928     }
929     return ia;
930   }
931 
932   /**
933    * @return Maximum time we should run balancer for
934    */
935   private int getBalancerCutoffTime() {
936     int balancerCutoffTime =
937       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
938     if (balancerCutoffTime == -1) {
939       // No time period set so create one
940       int balancerPeriod =
941         getConfiguration().getInt("hbase.balancer.period", 300000);
942       balancerCutoffTime = balancerPeriod;
943       // If nonsense period, set it to balancerPeriod
944       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
945     }
946     return balancerCutoffTime;
947   }
948 
949   public boolean balance() throws IOException {
950     // if master not initialized, don't run balancer.
951     if (!this.initialized) {
952       LOG.debug("Master has not been initialized, don't run balancer.");
953       return false;
954     }
955     // Do this call outside of synchronized block.
956     int maximumBalanceTime = getBalancerCutoffTime();
957     boolean balancerRan;
958     synchronized (this.balancer) {
959       // If balance not true, don't run balancer.
960       if (!this.loadBalancerTracker.isBalancerOn()) return false;
961       // Only allow one balance run at at time.
962       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
963         Map<String, RegionState> regionsInTransition =
964           this.assignmentManager.getRegionStates().getRegionsInTransition();
965         LOG.debug("Not running balancer because " + regionsInTransition.size() +
966           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
967             abbreviate(regionsInTransition.toString(), 256));
968         return false;
969       }
970       if (this.serverManager.areDeadServersInProgress()) {
971         LOG.debug("Not running balancer because processing dead regionserver(s): " +
972           this.serverManager.getDeadServers());
973         return false;
974       }
975 
976       if (this.cpHost != null) {
977         try {
978           if (this.cpHost.preBalance()) {
979             LOG.debug("Coprocessor bypassing balancer request");
980             return false;
981           }
982         } catch (IOException ioe) {
983           LOG.error("Error invoking master coprocessor preBalance()", ioe);
984           return false;
985         }
986       }
987 
988       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
989         this.assignmentManager.getRegionStates().getAssignmentsByTable();
990 
991       List<RegionPlan> plans = new ArrayList<RegionPlan>();
992       //Give the balancer the current cluster state.
993       this.balancer.setClusterStatus(getClusterStatus());
994       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
995         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
996         if (partialPlans != null) plans.addAll(partialPlans);
997       }
998       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
999       int rpCount = 0;  // number of RegionPlans balanced so far
1000       long totalRegPlanExecTime = 0;
1001       balancerRan = plans != null;
1002       if (plans != null && !plans.isEmpty()) {
1003         for (RegionPlan plan: plans) {
1004           LOG.info("balance " + plan);
1005           long balStartTime = System.currentTimeMillis();
1006           //TODO: bulk assign
1007           this.assignmentManager.balance(plan);
1008           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1009           rpCount++;
1010           if (rpCount < plans.size() &&
1011               // if performing next balance exceeds cutoff time, exit the loop
1012               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1013             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1014             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1015               maximumBalanceTime);
1016             break;
1017           }
1018         }
1019       }
1020       if (this.cpHost != null) {
1021         try {
1022           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1023         } catch (IOException ioe) {
1024           // balancing already succeeded so don't change the result
1025           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1026         }
1027       }
1028     }
1029     return balancerRan;
1030   }
1031 
1032   /**
1033    * @return Client info for use as prefix on an audit log string; who did an action
1034    */
1035   String getClientIdAuditPrefix() {
1036     return "Client=" + RequestContext.getRequestUserName() + "/" +
1037       RequestContext.get().getRemoteAddress();
1038   }
1039 
1040   /**
1041    * Switch for the background CatalogJanitor thread.
1042    * Used for testing.  The thread will continue to run.  It will just be a noop
1043    * if disabled.
1044    * @param b If false, the catalog janitor won't do anything.
1045    */
1046   public void setCatalogJanitorEnabled(final boolean b) {
1047     this.catalogJanitorChore.setEnabled(b);
1048   }
1049 
1050   @Override
1051   public void dispatchMergingRegions(final HRegionInfo region_a,
1052       final HRegionInfo region_b, final boolean forcible) throws IOException {
1053     checkInitialized();
1054     this.service.submit(new DispatchMergingRegionHandler(this,
1055         this.catalogJanitorChore, region_a, region_b, forcible));
1056   }
1057 
1058   void move(final byte[] encodedRegionName,
1059       final byte[] destServerName) throws HBaseIOException {
1060     RegionState regionState = assignmentManager.getRegionStates().
1061       getRegionState(Bytes.toString(encodedRegionName));
1062     if (regionState == null) {
1063       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1064     }
1065 
1066     HRegionInfo hri = regionState.getRegion();
1067     ServerName dest;
1068     if (destServerName == null || destServerName.length == 0) {
1069       LOG.info("Passed destination servername is null/empty so " +
1070         "choosing a server at random");
1071       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1072         regionState.getServerName());
1073       dest = balancer.randomAssignment(hri, destServers);
1074     } else {
1075       dest = ServerName.valueOf(Bytes.toString(destServerName));
1076       if (dest.equals(regionState.getServerName())) {
1077         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1078           + " because region already assigned to the same server " + dest + ".");
1079         return;
1080       }
1081     }
1082 
1083     // Now we can do the move
1084     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1085 
1086     try {
1087       checkInitialized();
1088       if (this.cpHost != null) {
1089         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1090           return;
1091         }
1092       }
1093       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1094       this.assignmentManager.balance(rp);
1095       if (this.cpHost != null) {
1096         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1097       }
1098     } catch (IOException ioe) {
1099       if (ioe instanceof HBaseIOException) {
1100         throw (HBaseIOException)ioe;
1101       }
1102       throw new HBaseIOException(ioe);
1103     }
1104   }
1105 
1106   @Override
1107   public void createTable(HTableDescriptor hTableDescriptor,
1108       byte [][] splitKeys) throws IOException {
1109     if (isStopped()) {
1110       throw new MasterNotRunningException();
1111     }
1112 
1113     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1114     getNamespaceDescriptor(namespace); // ensure namespace exists
1115 
1116     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1117     checkInitialized();
1118     sanityCheckTableDescriptor(hTableDescriptor);
1119     if (cpHost != null) {
1120       cpHost.preCreateTable(hTableDescriptor, newRegions);
1121     }
1122     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1123     this.service.submit(new CreateTableHandler(this,
1124       this.fileSystemManager, hTableDescriptor, conf,
1125       newRegions, this).prepare());
1126     if (cpHost != null) {
1127       cpHost.postCreateTable(hTableDescriptor, newRegions);
1128     }
1129 
1130   }
1131 
1132   /**
1133    * Checks whether the table conforms to some sane limits, and configured
1134    * values (compression, etc) work. Throws an exception if something is wrong.
1135    * @throws IOException
1136    */
1137   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1138     final String CONF_KEY = "hbase.table.sanity.checks";
1139     if (!conf.getBoolean(CONF_KEY, true)) {
1140       return;
1141     }
1142     String tableVal = htd.getConfigurationValue(CONF_KEY);
1143     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1144       return;
1145     }
1146 
1147     // check max file size
1148     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1149     long maxFileSize = htd.getMaxFileSize();
1150     if (maxFileSize < 0) {
1151       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1152     }
1153     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1154       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1155         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1156         + ") is too small, which might cause over splitting into unmanageable "
1157         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1158           + "if you want to bypass sanity checks");
1159     }
1160 
1161     // check flush size
1162     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1163     long flushSize = htd.getMemStoreFlushSize();
1164     if (flushSize < 0) {
1165       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1166     }
1167     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1168       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1169           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1170           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1171           + "if you want to bypass sanity checks");
1172     }
1173 
1174     // check split policy class can be loaded
1175     try {
1176       RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1177     } catch (Exception ex) {
1178       throw new DoNotRetryIOException(ex);
1179     }
1180 
1181     // check compression can be loaded
1182     checkCompression(htd);
1183 
1184     // check that we have at least 1 CF
1185     if (htd.getColumnFamilies().length == 0) {
1186       throw new DoNotRetryIOException("Table should have at least one column family "
1187           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1188     }
1189 
1190     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1191       if (hcd.getTimeToLive() <= 0) {
1192         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1193           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1194           + "if you want to bypass sanity checks");
1195       }
1196 
1197       // check blockSize
1198       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1199         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1200           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1201           + "if you want to bypass sanity checks");
1202       }
1203 
1204       // check versions
1205       if (hcd.getMinVersions() < 0) {
1206         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1207           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1208           + "if you want to bypass sanity checks");
1209       }
1210       // max versions already being checked
1211 
1212       // check replication scope
1213       if (hcd.getScope() < 0) {
1214         throw new DoNotRetryIOException("Replication scope for column family "
1215           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1216           + "or table descriptor if you want to bypass sanity checks");
1217       }
1218 
1219       // TODO: should we check coprocessors and encryption ?
1220     }
1221   }
1222 
1223   private void startActiveMasterManager() throws KeeperException {
1224     String backupZNode = ZKUtil.joinZNode(
1225       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1226     /*
1227     * Add a ZNode for ourselves in the backup master directory since we
1228     * may not become the active master. If so, we want the actual active
1229     * master to know we are backup masters, so that it won't assign
1230     * regions to us if so configured.
1231     *
1232     * If we become the active master later, ActiveMasterManager will delete
1233     * this node explicitly.  If we crash before then, ZooKeeper will delete
1234     * this node for us since it is ephemeral.
1235     */
1236     LOG.info("Adding ZNode for " + backupZNode + " in backup master directory");
1237     MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName);
1238 
1239     activeMasterManager = new ActiveMasterManager(zooKeeper, serverName, this);
1240     // Start a thread to try to become the active master, so we won't block here
1241     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1242       public void run() {
1243         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1244           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1245         // If we're a backup master, stall until a primary to writes his address
1246         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1247             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1248           LOG.debug("HMaster started in backup mode. "
1249             + "Stalling until master znode is written.");
1250           // This will only be a minute or so while the cluster starts up,
1251           // so don't worry about setting watches on the parent znode
1252           while (!activeMasterManager.hasActiveMaster()) {
1253             LOG.debug("Waiting for master address ZNode to be written "
1254               + "(Also watching cluster state node)");
1255             Threads.sleep(timeout);
1256           }
1257         }
1258         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1259         status.setDescription("Master startup");
1260         try {
1261           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1262             finishActiveMasterInitialization(status);
1263           }
1264         } catch (Throwable t) {
1265           status.setStatus("Failed to become active: " + t.getMessage());
1266           LOG.fatal("Failed to become active master", t);
1267           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1268           if (t instanceof NoClassDefFoundError &&
1269               t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1270             // improved error message for this special case
1271             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1272               + "recompile HBase against Hadoop version "
1273               +  org.apache.hadoop.util.VersionInfo.getVersion()
1274               + " or change your hadoop jars to start properly", t);
1275           } else {
1276             abort("Unhandled exception. Starting shutdown.", t);
1277           }
1278         } finally {
1279           status.cleanup();
1280         }
1281       }
1282     }, "ActiveMasterManager"));
1283   }
1284 
1285   private void checkCompression(final HTableDescriptor htd)
1286   throws IOException {
1287     if (!this.masterCheckCompression) return;
1288     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1289       checkCompression(hcd);
1290     }
1291   }
1292 
1293   private void checkCompression(final HColumnDescriptor hcd)
1294   throws IOException {
1295     if (!this.masterCheckCompression) return;
1296     CompressionTest.testCompression(hcd.getCompression());
1297     CompressionTest.testCompression(hcd.getCompactionCompression());
1298   }
1299 
1300   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1301     byte[][] splitKeys) {
1302     HRegionInfo[] hRegionInfos = null;
1303     if (splitKeys == null || splitKeys.length == 0) {
1304       hRegionInfos = new HRegionInfo[]{
1305           new HRegionInfo(hTableDescriptor.getTableName(), null, null)};
1306     } else {
1307       int numRegions = splitKeys.length + 1;
1308       hRegionInfos = new HRegionInfo[numRegions];
1309       byte[] startKey = null;
1310       byte[] endKey = null;
1311       for (int i = 0; i < numRegions; i++) {
1312         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1313         hRegionInfos[i] =
1314             new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey);
1315         startKey = endKey;
1316       }
1317     }
1318     return hRegionInfos;
1319   }
1320 
1321   private static boolean isCatalogTable(final TableName tableName) {
1322     return tableName.equals(TableName.META_TABLE_NAME);
1323   }
1324 
1325   @Override
1326   public void deleteTable(final TableName tableName) throws IOException {
1327     checkInitialized();
1328     if (cpHost != null) {
1329       cpHost.preDeleteTable(tableName);
1330     }
1331     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1332     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1333     if (cpHost != null) {
1334       cpHost.postDeleteTable(tableName);
1335     }
1336   }
1337 
1338   @Override
1339   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1340     checkInitialized();
1341     if (cpHost != null) {
1342       cpHost.preTruncateTable(tableName);
1343     }
1344     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1345     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1346     handler.prepare();
1347     handler.process();
1348     if (cpHost != null) {
1349       cpHost.postTruncateTable(tableName);
1350     }
1351   }
1352 
1353   @Override
1354   public void addColumn(final TableName tableName, final HColumnDescriptor column)
1355       throws IOException {
1356     checkInitialized();
1357     if (cpHost != null) {
1358       if (cpHost.preAddColumn(tableName, column)) {
1359         return;
1360       }
1361     }
1362     //TODO: we should process this (and some others) in an executor
1363     new TableAddFamilyHandler(tableName, column, this, this).prepare().process();
1364     if (cpHost != null) {
1365       cpHost.postAddColumn(tableName, column);
1366     }
1367   }
1368 
1369   @Override
1370   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1371       throws IOException {
1372     checkInitialized();
1373     checkCompression(descriptor);
1374     if (cpHost != null) {
1375       if (cpHost.preModifyColumn(tableName, descriptor)) {
1376         return;
1377       }
1378     }
1379     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1380     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1381       .prepare().process();
1382     if (cpHost != null) {
1383       cpHost.postModifyColumn(tableName, descriptor);
1384     }
1385   }
1386 
1387   @Override
1388   public void deleteColumn(final TableName tableName, final byte[] columnName)
1389       throws IOException {
1390     checkInitialized();
1391     if (cpHost != null) {
1392       if (cpHost.preDeleteColumn(tableName, columnName)) {
1393         return;
1394       }
1395     }
1396     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1397     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1398     if (cpHost != null) {
1399       cpHost.postDeleteColumn(tableName, columnName);
1400     }
1401   }
1402 
1403   @Override
1404   public void enableTable(final TableName tableName) throws IOException {
1405     checkInitialized();
1406     if (cpHost != null) {
1407       cpHost.preEnableTable(tableName);
1408     }
1409     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1410     this.service.submit(new EnableTableHandler(this, tableName,
1411       catalogTracker, assignmentManager, tableLockManager, false).prepare());
1412     if (cpHost != null) {
1413       cpHost.postEnableTable(tableName);
1414    }
1415   }
1416 
1417   @Override
1418   public void disableTable(final TableName tableName) throws IOException {
1419     checkInitialized();
1420     if (cpHost != null) {
1421       cpHost.preDisableTable(tableName);
1422     }
1423     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1424     this.service.submit(new DisableTableHandler(this, tableName,
1425       catalogTracker, assignmentManager, tableLockManager, false).prepare());
1426     if (cpHost != null) {
1427       cpHost.postDisableTable(tableName);
1428     }
1429   }
1430 
1431   /**
1432    * Return the region and current deployment for the region containing
1433    * the given row. If the region cannot be found, returns null. If it
1434    * is found, but not currently deployed, the second element of the pair
1435    * may be null.
1436    */
1437   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1438       final TableName tableName, final byte [] rowKey)
1439   throws IOException {
1440     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1441       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1442 
1443     MetaScannerVisitor visitor =
1444       new MetaScannerVisitorBase() {
1445         @Override
1446         public boolean processRow(Result data) throws IOException {
1447           if (data == null || data.size() <= 0) {
1448             return true;
1449           }
1450           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1451           if (pair == null) {
1452             return false;
1453           }
1454           if (!pair.getFirst().getTable().equals(tableName)) {
1455             return false;
1456           }
1457           result.set(pair);
1458           return true;
1459         }
1460     };
1461 
1462     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1463     return result.get();
1464   }
1465 
1466   @Override
1467   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1468       throws IOException {
1469     checkInitialized();
1470     sanityCheckTableDescriptor(descriptor);
1471     if (cpHost != null) {
1472       cpHost.preModifyTable(tableName, descriptor);
1473     }
1474     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1475     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1476     if (cpHost != null) {
1477       cpHost.postModifyTable(tableName, descriptor);
1478     }
1479   }
1480 
1481   @Override
1482   public void checkTableModifiable(final TableName tableName)
1483       throws IOException, TableNotFoundException, TableNotDisabledException {
1484     if (isCatalogTable(tableName)) {
1485       throw new IOException("Can't modify catalog tables");
1486     }
1487     if (!MetaReader.tableExists(getCatalogTracker(), tableName)) {
1488       throw new TableNotFoundException(tableName);
1489     }
1490     if (!getAssignmentManager().getTableStateManager().
1491         isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
1492       throw new TableNotDisabledException(tableName);
1493     }
1494   }
1495 
1496   /**
1497    * @return cluster status
1498    */
1499   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1500     // Build Set of backup masters from ZK nodes
1501     List<String> backupMasterStrings;
1502     try {
1503       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1504         this.zooKeeper.backupMasterAddressesZNode);
1505     } catch (KeeperException e) {
1506       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1507       backupMasterStrings = new ArrayList<String>(0);
1508     }
1509     List<ServerName> backupMasters = new ArrayList<ServerName>(
1510                                           backupMasterStrings.size());
1511     for (String s: backupMasterStrings) {
1512       try {
1513         byte [] bytes;
1514         try {
1515           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1516               this.zooKeeper.backupMasterAddressesZNode, s));
1517         } catch (InterruptedException e) {
1518           throw new InterruptedIOException();
1519         }
1520         if (bytes != null) {
1521           ServerName sn;
1522           try {
1523             sn = ServerName.parseFrom(bytes);
1524           } catch (DeserializationException e) {
1525             LOG.warn("Failed parse, skipping registering backup server", e);
1526             continue;
1527           }
1528           backupMasters.add(sn);
1529         }
1530       } catch (KeeperException e) {
1531         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1532                  "backup servers"), e);
1533       }
1534     }
1535     Collections.sort(backupMasters, new Comparator<ServerName>() {
1536       @Override
1537       public int compare(ServerName s1, ServerName s2) {
1538         return s1.getServerName().compareTo(s2.getServerName());
1539       }});
1540 
1541     String clusterId = fileSystemManager != null ?
1542       fileSystemManager.getClusterId().toString() : null;
1543     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1544       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1545     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1546     boolean balancerOn = loadBalancerTracker != null ?
1547       loadBalancerTracker.isBalancerOn() : false;
1548     Map<ServerName, ServerLoad> onlineServers = null;
1549     Set<ServerName> deadServers = null;
1550     if (serverManager != null) {
1551       deadServers = serverManager.getDeadServers().copyServerNames();
1552       onlineServers = serverManager.getOnlineServers();
1553     }
1554     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1555       onlineServers, deadServers, serverName, backupMasters,
1556       regionsInTransition, coprocessors, balancerOn);
1557   }
1558 
1559   /**
1560    * The set of loaded coprocessors is stored in a static set. Since it's
1561    * statically allocated, it does not require that HMaster's cpHost be
1562    * initialized prior to accessing it.
1563    * @return a String representation of the set of names of the loaded
1564    * coprocessors.
1565    */
1566   public static String getLoadedCoprocessors() {
1567     return CoprocessorHost.getLoadedCoprocessors().toString();
1568   }
1569 
1570   /**
1571    * @return timestamp in millis when HMaster was started.
1572    */
1573   public long getMasterStartTime() {
1574     return startcode;
1575   }
1576 
1577   /**
1578    * @return timestamp in millis when HMaster became the active master.
1579    */
1580   public long getMasterActiveTime() {
1581     return masterActiveTime;
1582   }
1583 
1584   public int getRegionServerInfoPort(final ServerName sn) {
1585     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1586     if (info == null || info.getInfoPort() == 0) {
1587       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1588         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1589     }
1590     return info.getInfoPort();
1591   }
1592 
1593   /**
1594    * @return array of coprocessor SimpleNames.
1595    */
1596   public String[] getMasterCoprocessors() {
1597     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1598     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1599   }
1600 
1601   @Override
1602   public void abort(final String msg, final Throwable t) {
1603     if (cpHost != null) {
1604       // HBASE-4014: dump a list of loaded coprocessors.
1605       LOG.fatal("Master server abort: loaded coprocessors are: " +
1606           getLoadedCoprocessors());
1607     }
1608     if (t != null) LOG.fatal(msg, t);
1609     stop(msg);
1610   }
1611 
1612   @Override
1613   public ZooKeeperWatcher getZooKeeper() {
1614     return zooKeeper;
1615   }
1616 
1617   @Override
1618   public MasterCoprocessorHost getMasterCoprocessorHost() {
1619     return cpHost;
1620   }
1621 
1622   @Override
1623   public ServerName getServerName() {
1624     return this.serverName;
1625   }
1626 
1627   @Override
1628   public AssignmentManager getAssignmentManager() {
1629     return this.assignmentManager;
1630   }
1631 
1632   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1633     return rsFatals;
1634   }
1635 
1636   public void shutdown() {
1637     if (cpHost != null) {
1638       try {
1639         cpHost.preShutdown();
1640       } catch (IOException ioe) {
1641         LOG.error("Error call master coprocessor preShutdown()", ioe);
1642       }
1643     }
1644     if (this.assignmentManager != null) {
1645       this.assignmentManager.shutdown();
1646     }
1647     try {
1648       if (this.clusterStatusTracker != null){
1649         this.clusterStatusTracker.setClusterDown();
1650         if (this.serverManager != null) {
1651           this.serverManager.shutdownCluster();
1652         }
1653       }
1654     } catch (KeeperException e) {
1655       LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1656     }
1657   }
1658 
1659   public void stopMaster() {
1660     if (cpHost != null) {
1661       try {
1662         cpHost.preStopMaster();
1663       } catch (IOException ioe) {
1664         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1665       }
1666     }
1667     stop("Stopped by " + Thread.currentThread().getName());
1668   }
1669 
1670   void checkServiceStarted() throws ServerNotRunningYetException {
1671     if (!serviceStarted) {
1672       throw new ServerNotRunningYetException("Server is not running yet");
1673     }
1674   }
1675 
1676   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1677     checkServiceStarted();
1678     if (!this.initialized) {
1679       throw new PleaseHoldException("Master is initializing");
1680     }
1681   }
1682 
1683   void checkNamespaceManagerReady() throws IOException {
1684     checkInitialized();
1685     if (tableNamespaceManager == null ||
1686         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1687       throw new IOException("Table Namespace Manager not ready yet, try again later");
1688     }
1689   }
1690   /**
1691    * Report whether this master is currently the active master or not.
1692    * If not active master, we are parked on ZK waiting to become active.
1693    *
1694    * This method is used for testing.
1695    *
1696    * @return true if active master, false if not.
1697    */
1698   public boolean isActiveMaster() {
1699     return isActiveMaster;
1700   }
1701 
1702   /**
1703    * Report whether this master has completed with its initialization and is
1704    * ready.  If ready, the master is also the active master.  A standby master
1705    * is never ready.
1706    *
1707    * This method is used for testing.
1708    *
1709    * @return true if master is ready to go, false if not.
1710    */
1711   @Override
1712   public boolean isInitialized() {
1713     return initialized;
1714   }
1715 
1716   /**
1717    * ServerShutdownHandlerEnabled is set false before completing
1718    * assignMeta to prevent processing of ServerShutdownHandler.
1719    * @return true if assignMeta has completed;
1720    */
1721   @Override
1722   public boolean isServerShutdownHandlerEnabled() {
1723     return this.serverShutdownHandlerEnabled;
1724   }
1725 
1726   /**
1727    * Report whether this master has started initialization and is about to do meta region assignment
1728    * @return true if master is in initialization & about to assign hbase:meta regions
1729    */
1730   public boolean isInitializationStartsMetaRegionAssignment() {
1731     return this.initializationBeforeMetaAssignment;
1732   }
1733 
1734   public void assignRegion(HRegionInfo hri) {
1735     assignmentManager.assign(hri, true);
1736   }
1737 
1738   /**
1739    * Compute the average load across all region servers.
1740    * Currently, this uses a very naive computation - just uses the number of
1741    * regions being served, ignoring stats about number of requests.
1742    * @return the average load
1743    */
1744   public double getAverageLoad() {
1745     if (this.assignmentManager == null) {
1746       return 0;
1747     }
1748 
1749     RegionStates regionStates = this.assignmentManager.getRegionStates();
1750     if (regionStates == null) {
1751       return 0;
1752     }
1753     return regionStates.getAverageLoad();
1754   }
1755 
1756   @Override
1757   public boolean registerService(Service instance) {
1758     /*
1759      * No stacking of instances is allowed for a single service name
1760      */
1761     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1762     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1763       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1764           " already registered, rejecting request from "+instance
1765       );
1766       return false;
1767     }
1768 
1769     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
1770     if (LOG.isDebugEnabled()) {
1771       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
1772     }
1773     return true;
1774   }
1775 
1776   /**
1777    * Utility for constructing an instance of the passed HMaster class.
1778    * @param masterClass
1779    * @param conf
1780    * @return HMaster instance.
1781    */
1782   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
1783       final Configuration conf, final CoordinatedStateManager cp)  {
1784     try {
1785       Constructor<? extends HMaster> c =
1786         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
1787       return c.newInstance(conf, cp);
1788     } catch (InvocationTargetException ite) {
1789       Throwable target = ite.getTargetException() != null?
1790         ite.getTargetException(): ite;
1791       if (target.getCause() != null) target = target.getCause();
1792       throw new RuntimeException("Failed construction of Master: " +
1793         masterClass.toString(), target);
1794     } catch (Exception e) {
1795       throw new RuntimeException("Failed construction of Master: " +
1796         masterClass.toString() + ((e.getCause() != null)?
1797           e.getCause().getMessage(): ""), e);
1798     }
1799   }
1800 
1801   /**
1802    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
1803    */
1804   public static void main(String [] args) {
1805     VersionInfo.logVersion();
1806     new HMasterCommandLine(HMaster.class).doMain(args);
1807   }
1808 
1809   public HFileCleaner getHFileCleaner() {
1810     return this.hfileCleaner;
1811   }
1812 
1813   /**
1814    * Exposed for TESTING!
1815    * @return the underlying snapshot manager
1816    */
1817   public SnapshotManager getSnapshotManagerForTesting() {
1818     return this.snapshotManager;
1819   }
1820 
1821   @Override
1822   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
1823     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1824     checkNamespaceManagerReady();
1825     if (cpHost != null) {
1826       if (cpHost.preCreateNamespace(descriptor)) {
1827         return;
1828       }
1829     }
1830     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
1831     tableNamespaceManager.create(descriptor);
1832     if (cpHost != null) {
1833       cpHost.postCreateNamespace(descriptor);
1834     }
1835   }
1836 
1837   @Override
1838   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
1839     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1840     checkNamespaceManagerReady();
1841     if (cpHost != null) {
1842       if (cpHost.preModifyNamespace(descriptor)) {
1843         return;
1844       }
1845     }
1846     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1847     tableNamespaceManager.update(descriptor);
1848     if (cpHost != null) {
1849       cpHost.postModifyNamespace(descriptor);
1850     }
1851   }
1852 
1853   @Override
1854   public void deleteNamespace(String name) throws IOException {
1855     checkNamespaceManagerReady();
1856     if (cpHost != null) {
1857       if (cpHost.preDeleteNamespace(name)) {
1858         return;
1859       }
1860     }
1861     LOG.info(getClientIdAuditPrefix() + " delete " + name);
1862     tableNamespaceManager.remove(name);
1863     if (cpHost != null) {
1864       cpHost.postDeleteNamespace(name);
1865     }
1866   }
1867 
1868   @Override
1869   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
1870     checkNamespaceManagerReady();
1871     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
1872     if (nsd == null) {
1873       throw new NamespaceNotFoundException(name);
1874     }
1875     return nsd;
1876   }
1877 
1878   @Override
1879   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
1880     checkNamespaceManagerReady();
1881     return Lists.newArrayList(tableNamespaceManager.list());
1882   }
1883 
1884   @Override
1885   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
1886     getNamespaceDescriptor(name); // check that namespace exists
1887     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
1888   }
1889 
1890   @Override
1891   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
1892     List<TableName> tableNames = Lists.newArrayList();
1893     getNamespaceDescriptor(name); // check that namespace exists
1894     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
1895       tableNames.add(descriptor.getTableName());
1896     }
1897     return tableNames;
1898   }
1899 }