View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.Comparator;
31  import java.util.HashSet;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.concurrent.atomic.AtomicReference;
36  
37  import javax.servlet.ServletException;
38  import javax.servlet.http.HttpServlet;
39  import javax.servlet.http.HttpServletRequest;
40  import javax.servlet.http.HttpServletResponse;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.hadoop.classification.InterfaceAudience;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.ClusterStatus;
48  import org.apache.hadoop.hbase.CoordinatedStateException;
49  import org.apache.hadoop.hbase.DoNotRetryIOException;
50  import org.apache.hadoop.hbase.HBaseIOException;
51  import org.apache.hadoop.hbase.HColumnDescriptor;
52  import org.apache.hadoop.hbase.HConstants;
53  import org.apache.hadoop.hbase.HRegionInfo;
54  import org.apache.hadoop.hbase.HTableDescriptor;
55  import org.apache.hadoop.hbase.MasterNotRunningException;
56  import org.apache.hadoop.hbase.NamespaceDescriptor;
57  import org.apache.hadoop.hbase.NamespaceNotFoundException;
58  import org.apache.hadoop.hbase.PleaseHoldException;
59  import org.apache.hadoop.hbase.Server;
60  import org.apache.hadoop.hbase.ServerLoad;
61  import org.apache.hadoop.hbase.ServerName;
62  import org.apache.hadoop.hbase.TableDescriptors;
63  import org.apache.hadoop.hbase.TableName;
64  import org.apache.hadoop.hbase.TableNotDisabledException;
65  import org.apache.hadoop.hbase.TableNotFoundException;
66  import org.apache.hadoop.hbase.UnknownRegionException;
67  import org.apache.hadoop.hbase.MetaMigrationConvertingToPB;
68  import org.apache.hadoop.hbase.MetaTableAccessor;
69  import org.apache.hadoop.hbase.client.MetaScanner;
70  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
71  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
72  import org.apache.hadoop.hbase.client.Result;
73  import org.apache.hadoop.hbase.CoordinatedStateManager;
74  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
75  import org.apache.hadoop.hbase.exceptions.DeserializationException;
76  import org.apache.hadoop.hbase.executor.ExecutorType;
77  import org.apache.hadoop.hbase.ipc.RequestContext;
78  import org.apache.hadoop.hbase.ipc.RpcServer;
79  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
80  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
81  import org.apache.hadoop.hbase.master.RegionState.State;
82  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
83  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
84  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
85  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
86  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
87  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
88  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
89  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
90  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
91  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
92  import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
93  import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
94  import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
95  import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
96  import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
97  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
98  import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
99  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
100 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
101 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
102 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
103 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
104 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
105 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
106 import org.apache.hadoop.hbase.regionserver.HRegionServer;
107 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
108 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
109 import org.apache.hadoop.hbase.replication.regionserver.Replication;
110 import org.apache.hadoop.hbase.security.UserProvider;
111 import org.apache.hadoop.hbase.util.Bytes;
112 import org.apache.hadoop.hbase.util.CompressionTest;
113 import org.apache.hadoop.hbase.util.FSUtils;
114 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
115 import org.apache.hadoop.hbase.util.Pair;
116 import org.apache.hadoop.hbase.util.Threads;
117 import org.apache.hadoop.hbase.util.VersionInfo;
118 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
119 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
120 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
121 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
122 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
123 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
124 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
125 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
126 import org.apache.zookeeper.KeeperException;
127 import org.apache.zookeeper.Watcher;
128 import org.mortbay.jetty.Connector;
129 import org.mortbay.jetty.nio.SelectChannelConnector;
130 import org.mortbay.jetty.servlet.Context;
131 
132 import com.google.common.annotations.VisibleForTesting;
133 import com.google.common.collect.Lists;
134 import com.google.common.collect.Maps;
135 import com.google.protobuf.Descriptors;
136 import com.google.protobuf.Service;
137 
138 /**
139  * HMaster is the "master server" for HBase. An HBase cluster has one active
140  * master.  If many masters are started, all compete.  Whichever wins goes on to
141  * run the cluster.  All others park themselves in their constructor until
142  * master or cluster shutdown or until the active master loses its lease in
143  * zookeeper.  Thereafter, all running master jostle to take over master role.
144  *
145  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
146  * this case it will tell all regionservers to go down and then wait on them
147  * all reporting in that they are down.  This master will then shut itself down.
148  *
149  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
150  *
151  * @see Watcher
152  */
153 @InterfaceAudience.Private
154 @SuppressWarnings("deprecation")
155 public class HMaster extends HRegionServer implements MasterServices, Server {
156   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
157 
158   // MASTER is name of the webapp and the attribute name used stuffing this
159   //instance into web context.
160   public static final String MASTER = "master";
161 
162   // Manager and zk listener for master election
163   private ActiveMasterManager activeMasterManager;
164   // Region server tracker
165   RegionServerTracker regionServerTracker;
166   // Draining region server tracker
167   private DrainingServerTracker drainingServerTracker;
168   // Tracker for load balancer state
169   LoadBalancerTracker loadBalancerTracker;
170 
171   /** Namespace stuff */
172   private TableNamespaceManager tableNamespaceManager;
173   private NamespaceJanitor namespaceJanitorChore;
174 
175   // Metrics for the HMaster
176   final MetricsMaster metricsMaster;
177   // file system manager for the master FS operations
178   private MasterFileSystem fileSystemManager;
179 
180   // server manager to deal with region server info
181   volatile ServerManager serverManager;
182 
183   // manager of assignment nodes in zookeeper
184   AssignmentManager assignmentManager;
185 
186   // buffer for "fatal error" notices from region servers
187   // in the cluster. This is only used for assisting
188   // operations/debugging.
189   MemoryBoundedLogMessageBuffer rsFatals;
190 
191   // flag set after we become the active master (used for testing)
192   private volatile boolean isActiveMaster = false;
193 
194   // flag set after we complete initialization once active,
195   // it is not private since it's used in unit tests
196   volatile boolean initialized = false;
197 
198   // flag set after master services are started,
199   // initialization may have not completed yet.
200   volatile boolean serviceStarted = false;
201 
202   // flag set after we complete assignMeta.
203   private volatile boolean serverShutdownHandlerEnabled = false;
204 
205   LoadBalancer balancer;
206   private BalancerChore balancerChore;
207   private ClusterStatusChore clusterStatusChore;
208   private ClusterStatusPublisher clusterStatusPublisherChore = null;
209 
210   CatalogJanitor catalogJanitorChore;
211   private LogCleaner logCleaner;
212   private HFileCleaner hfileCleaner;
213 
214   MasterCoprocessorHost cpHost;
215 
216   // Time stamps for when a hmaster became active
217   private long masterActiveTime;
218 
219   //should we check the compression codec type at master side, default true, HBASE-6370
220   private final boolean masterCheckCompression;
221 
222   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
223 
224   // monitor for snapshot of hbase tables
225   SnapshotManager snapshotManager;
226   // monitor for distributed procedures
227   MasterProcedureManagerHost mpmHost;
228 
229   /** flag used in test cases in order to simulate RS failures during master initialization */
230   private volatile boolean initializationBeforeMetaAssignment = false;
231 
232   /** jetty server for master to redirect requests to regionserver infoServer */
233   private org.mortbay.jetty.Server masterJettyServer;
234 
235   public static class RedirectServlet extends HttpServlet {
236     private static final long serialVersionUID = 2894774810058302472L;
237     private static int regionServerInfoPort;
238 
239     @Override
240     public void doGet(HttpServletRequest request,
241         HttpServletResponse response) throws ServletException, IOException {
242       String redirectUrl = request.getScheme() + "://"
243         + request.getServerName() + ":" + regionServerInfoPort
244         + request.getRequestURI();
245       response.sendRedirect(redirectUrl);
246     }
247   }
248 
249   /**
250    * Initializes the HMaster. The steps are as follows:
251    * <p>
252    * <ol>
253    * <li>Initialize the local HRegionServer
254    * <li>Start the ActiveMasterManager.
255    * </ol>
256    * <p>
257    * Remaining steps of initialization occur in
258    * {@link #finishActiveMasterInitialization(MonitoredTask)} after
259    * the master becomes the active one.
260    *
261    * @throws InterruptedException
262    * @throws KeeperException
263    * @throws IOException
264    */
265   public HMaster(final Configuration conf, CoordinatedStateManager csm)
266       throws IOException, KeeperException, InterruptedException {
267     super(conf, csm);
268     this.rsFatals = new MemoryBoundedLogMessageBuffer(
269       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
270 
271     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
272         ", hbase.cluster.distributed=" + this.conf.getBoolean("hbase.cluster.distributed", false));
273 
274     Replication.decorateMasterConfiguration(this.conf);
275 
276     // Hack! Maps DFSClient => Master for logs.  HDFS made this
277     // config param for task trackers, but we can piggyback off of it.
278     if (this.conf.get("mapreduce.task.attempt.id") == null) {
279       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
280     }
281 
282     //should we check the compression codec type at master side, default true, HBASE-6370
283     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
284 
285     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
286 
287     // Do we publish the status?
288     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
289         HConstants.STATUS_PUBLISHED_DEFAULT);
290     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
291         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
292             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
293             ClusterStatusPublisher.Publisher.class);
294 
295     if (shouldPublish) {
296       if (publisherClass == null) {
297         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
298             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
299             " is not set - not publishing status");
300       } else {
301         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
302         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
303       }
304     }
305     startActiveMasterManager();
306     putUpJettyServer();
307   }
308 
309   private void putUpJettyServer() throws IOException {
310     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
311       return;
312     }
313     int infoPort = conf.getInt("hbase.master.info.port.orig",
314       HConstants.DEFAULT_MASTER_INFOPORT);
315     // -1 is for disabling info server, so no redirecting
316     if (infoPort < 0 || infoServer == null) {
317       return;
318     }
319 
320     RedirectServlet.regionServerInfoPort = infoServer.getPort();
321     masterJettyServer = new org.mortbay.jetty.Server();
322     Connector connector = new SelectChannelConnector();
323     connector.setHost(conf.get("hbase.master.info.bindAddress", "0.0.0.0"));
324     connector.setPort(infoPort);
325     masterJettyServer.addConnector(connector);
326     masterJettyServer.setStopAtShutdown(true);
327     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
328     context.addServlet(RedirectServlet.class, "/*");
329     try {
330       masterJettyServer.start();
331     } catch (Exception e) {
332       throw new IOException("Failed to start redirecting jetty server", e);
333     }
334   }
335 
336   /**
337    * For compatibility, if failed with regionserver credentials, try the master one
338    */
339   protected void login(UserProvider user, String host) throws IOException {
340     try {
341       super.login(user, host);
342     } catch (IOException ie) {
343       user.login("hbase.master.keytab.file",
344         "hbase.master.kerberos.principal", host);
345     }
346   }
347 
348   @VisibleForTesting
349   public MasterRpcServices getMasterRpcServices() {
350     return (MasterRpcServices)rpcServices;
351   }
352 
353   public boolean balanceSwitch(final boolean b) throws IOException {
354     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
355   }
356 
357   protected String getProcessName() {
358     return MASTER;
359   }
360 
361   protected boolean canCreateBaseZNode() {
362     return true;
363   }
364 
365   protected boolean canUpdateTableDescriptor() {
366     return true;
367   }
368 
369   protected RSRpcServices createRpcServices() throws IOException {
370     return new MasterRpcServices(this);
371   }
372 
373   protected void configureInfoServer() {
374     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
375     infoServer.setAttribute(MASTER, this);
376     super.configureInfoServer();
377   }
378 
379   protected Class<? extends HttpServlet> getDumpServlet() {
380     return MasterDumpServlet.class;
381   }
382 
383   /**
384    * Emit the HMaster metrics, such as region in transition metrics.
385    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
386    */
387   protected void doMetrics() {
388     try {
389       if (assignmentManager != null) {
390         assignmentManager.updateRegionsInTransitionMetrics();
391       }
392     } catch (Throwable e) {
393       LOG.error("Couldn't update metrics: " + e.getMessage());
394     }
395   }
396 
397   MetricsMaster getMasterMetrics() {
398     return metricsMaster;
399   }
400 
401   /**
402    * Initialize all ZK based system trackers.
403    * @throws IOException
404    * @throws InterruptedException
405    * @throws KeeperException
406    * @throws CoordinatedStateException
407    */
408   void initializeZKBasedSystemTrackers() throws IOException,
409       InterruptedException, KeeperException, CoordinatedStateException {
410     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
411     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
412     this.loadBalancerTracker.start();
413     this.assignmentManager = new AssignmentManager(this, serverManager,
414       this.balancer, this.service, this.metricsMaster,
415       this.tableLockManager);
416     zooKeeper.registerListenerFirst(assignmentManager);
417 
418     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
419         this.serverManager);
420     this.regionServerTracker.start();
421 
422     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
423       this.serverManager);
424     this.drainingServerTracker.start();
425 
426     // Set the cluster as up.  If new RSs, they'll be waiting on this before
427     // going ahead with their startup.
428     boolean wasUp = this.clusterStatusTracker.isClusterUp();
429     if (!wasUp) this.clusterStatusTracker.setClusterUp();
430 
431     LOG.info("Server active/primary master=" + this.serverName +
432         ", sessionid=0x" +
433         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
434         ", setting cluster-up flag (Was=" + wasUp + ")");
435 
436     // create/initialize the snapshot manager and other procedure managers
437     this.snapshotManager = new SnapshotManager();
438     this.mpmHost = new MasterProcedureManagerHost();
439     this.mpmHost.register(this.snapshotManager);
440     this.mpmHost.register(new MasterFlushTableProcedureManager());
441     this.mpmHost.loadProcedures(conf);
442     this.mpmHost.initialize(this, this.metricsMaster);
443   }
444 
445   /**
446    * Finish initialization of HMaster after becoming the primary master.
447    *
448    * <ol>
449    * <li>Initialize master components - file system manager, server manager,
450    *     assignment manager, region server tracker, etc</li>
451    * <li>Start necessary service threads - balancer, catalog janior,
452    *     executor services, etc</li>
453    * <li>Set cluster as UP in ZooKeeper</li>
454    * <li>Wait for RegionServers to check-in</li>
455    * <li>Split logs and perform data recovery, if necessary</li>
456    * <li>Ensure assignment of meta/namespace regions<li>
457    * <li>Handle either fresh cluster start or master failover</li>
458    * </ol>
459    *
460    * @throws IOException
461    * @throws InterruptedException
462    * @throws KeeperException
463    * @throws CoordinatedStateException
464    */
465   private void finishActiveMasterInitialization(MonitoredTask status)
466       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
467 
468     isActiveMaster = true;
469 
470     /*
471      * We are active master now... go initialize components we need to run.
472      * Note, there may be dross in zk from previous runs; it'll get addressed
473      * below after we determine if cluster startup or failover.
474      */
475 
476     status.setStatus("Initializing Master file system");
477 
478     this.masterActiveTime = System.currentTimeMillis();
479     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
480     this.fileSystemManager = new MasterFileSystem(this, this);
481 
482     // publish cluster ID
483     status.setStatus("Publishing Cluster ID in ZooKeeper");
484     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
485     this.serverManager = createServerManager(this, this);
486 
487     metaTableLocator = new MetaTableLocator();
488     shortCircuitConnection = createShortCircuitConnection();
489 
490     // Invalidate all write locks held previously
491     this.tableLockManager.reapWriteLocks();
492 
493     status.setStatus("Initializing ZK system trackers");
494     initializeZKBasedSystemTrackers();
495 
496     // initialize master side coprocessors before we start handling requests
497     status.setStatus("Initializing master coprocessors");
498     this.cpHost = new MasterCoprocessorHost(this, this.conf);
499 
500     // start up all service threads.
501     status.setStatus("Initializing master service threads");
502     startServiceThreads();
503 
504     // Wake up this server to check in
505     sleeper.skipSleepCycle();
506 
507     // Wait for region servers to report in
508     this.serverManager.waitForRegionServers(status);
509     // Check zk for region servers that are up but didn't register
510     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
511       // The isServerOnline check is opportunistic, correctness is handled inside
512       if (!this.serverManager.isServerOnline(sn)
513           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
514         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
515       }
516     }
517 
518     // get a list for previously failed RS which need log splitting work
519     // we recover hbase:meta region servers inside master initialization and
520     // handle other failed servers in SSH in order to start up master node ASAP
521     Set<ServerName> previouslyFailedServers = this.fileSystemManager
522         .getFailedServersFromLogFolders();
523 
524     // remove stale recovering regions from previous run
525     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
526 
527     // log splitting for hbase:meta server
528     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
529     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
530       splitMetaLogBeforeAssignment(oldMetaServerLocation);
531       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
532       // may also host user regions
533     }
534     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
535     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
536     // instead of previouslyFailedMetaRSs alone to address the following two situations:
537     // 1) the chained failure situation(recovery failed multiple times in a row).
538     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
539     // same server still has non-meta wals to be replayed so that
540     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
541     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
542     // there is no op for the server.
543     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
544 
545     this.initializationBeforeMetaAssignment = true;
546 
547     // Wait for regionserver to finish initialization.
548     synchronized (online) {
549       while (!isStopped() && !isOnline()) {
550         online.wait(100);
551       }
552     }
553 
554     //initialize load balancer
555     this.balancer.setClusterStatus(getClusterStatus());
556     this.balancer.setMasterServices(this);
557     this.balancer.initialize();
558 
559     // Check if master is shutting down because of some issue
560     // in initializing the regionserver or the balancer.
561     if(isStopped()) return;
562 
563     // Make sure meta assigned before proceeding.
564     status.setStatus("Assigning Meta Region");
565     assignMeta(status, previouslyFailedMetaRSs);
566     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
567     // assigned when master is shutting down
568     if(isStopped()) return;
569 
570     status.setStatus("Submitting log splitting work for previously failed region servers");
571     // Master has recovered hbase:meta region server and we put
572     // other failed region servers in a queue to be handled later by SSH
573     for (ServerName tmpServer : previouslyFailedServers) {
574       this.serverManager.processDeadServer(tmpServer, true);
575     }
576 
577     // Update meta with new PB serialization if required. i.e migrate all HRI to PB serialization
578     // in meta. This must happen before we assign all user regions or else the assignment will
579     // fail.
580     MetaMigrationConvertingToPB.updateMetaIfNecessary(this);
581 
582     // Fix up assignment manager status
583     status.setStatus("Starting assignment manager");
584     this.assignmentManager.joinCluster();
585 
586     //set cluster status again after user regions are assigned
587     this.balancer.setClusterStatus(getClusterStatus());
588 
589     // Start balancer and meta catalog janitor after meta and regions have
590     // been assigned.
591     status.setStatus("Starting balancer and catalog janitor");
592     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
593     Threads.setDaemonThreadRunning(clusterStatusChore.getThread());
594     this.balancerChore = new BalancerChore(this);
595     Threads.setDaemonThreadRunning(balancerChore.getThread());
596     this.catalogJanitorChore = new CatalogJanitor(this, this);
597     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
598 
599     status.setStatus("Starting namespace manager");
600     initNamespace();
601 
602     if (this.cpHost != null) {
603       try {
604         this.cpHost.preMasterInitialization();
605       } catch (IOException e) {
606         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
607       }
608     }
609 
610     status.markComplete("Initialization successful");
611     LOG.info("Master has completed initialization");
612     initialized = true;
613     // clear the dead servers with same host name and port of online server because we are not
614     // removing dead server with same hostname and port of rs which is trying to check in before
615     // master initialization. See HBASE-5916.
616     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
617 
618     if (this.cpHost != null) {
619       // don't let cp initialization errors kill the master
620       try {
621         this.cpHost.postStartMaster();
622       } catch (IOException ioe) {
623         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
624       }
625     }
626   }
627 
628   /**
629    * Useful for testing purpose also where we have
630    * master restart scenarios.
631    */
632   protected void startCatalogJanitorChore() {
633     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
634   }
635 
636   /**
637    * Useful for testing purpose also where we have
638    * master restart scenarios.
639    */
640   protected void startNamespaceJanitorChore() {
641     Threads.setDaemonThreadRunning(namespaceJanitorChore.getThread());
642   }
643 
644   /**
645    * Create a {@link ServerManager} instance.
646    * @param master
647    * @param services
648    * @return An instance of {@link ServerManager}
649    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
650    * @throws IOException
651    */
652   ServerManager createServerManager(final Server master,
653       final MasterServices services)
654   throws IOException {
655     // We put this out here in a method so can do a Mockito.spy and stub it out
656     // w/ a mocked up ServerManager.
657     return new ServerManager(master, services);
658   }
659 
660   /**
661    * Check <code>hbase:meta</code> is assigned. If not, assign it.
662    * @param status MonitoredTask
663    * @param previouslyFailedMetaRSs
664    * @throws InterruptedException
665    * @throws IOException
666    * @throws KeeperException
667    */
668   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
669       throws InterruptedException, IOException, KeeperException {
670     // Work on meta region
671     int assigned = 0;
672     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
673     status.setStatus("Assigning hbase:meta region");
674 
675     RegionStates regionStates = assignmentManager.getRegionStates();
676     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO);
677     boolean rit = this.assignmentManager
678       .processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
679     boolean metaRegionLocation = metaTableLocator.verifyMetaRegionLocation(
680       this.getShortCircuitConnection(), this.getZooKeeper(), timeout);
681     ServerName currentMetaServer = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
682     if (!metaRegionLocation) {
683       // Meta location is not verified. It should be in transition, or offline.
684       // We will wait for it to be assigned in enableSSHandWaitForMeta below.
685       assigned++;
686       if (!rit) {
687         // Assign meta since not already in transition
688         if (currentMetaServer != null) {
689           // If the meta server is not known to be dead or online,
690           // just split the meta log, and don't expire it since this
691           // could be a full cluster restart. Otherwise, we will think
692           // this is a failover and lose previous region locations.
693           // If it is really a failover case, AM will find out in rebuilding
694           // user regions. Otherwise, we are good since all logs are split
695           // or known to be replayed before user regions are assigned.
696           if (serverManager.isServerOnline(currentMetaServer)) {
697             LOG.info("Forcing expire of " + currentMetaServer);
698             serverManager.expireServer(currentMetaServer);
699           }
700           splitMetaLogBeforeAssignment(currentMetaServer);
701           previouslyFailedMetaRSs.add(currentMetaServer);
702         }
703         assignmentManager.assignMeta();
704       }
705     } else {
706       // Region already assigned. We didn't assign it. Add to in-memory state.
707       regionStates.updateRegionState(
708         HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer);
709       this.assignmentManager.regionOnline(
710         HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer);
711     }
712 
713     enableMeta(TableName.META_TABLE_NAME);
714 
715     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
716         && (!previouslyFailedMetaRSs.isEmpty())) {
717       // replay WAL edits mode need new hbase:meta RS is assigned firstly
718       status.setStatus("replaying log for Meta Region");
719       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
720     }
721 
722     // Make sure a hbase:meta location is set. We need to enable SSH here since
723     // if the meta region server is died at this time, we need it to be re-assigned
724     // by SSH so that system tables can be assigned.
725     // No need to wait for meta is assigned = 0 when meta is just verified.
726     enableServerShutdownHandler(assigned != 0);
727 
728     LOG.info("hbase:meta assigned=" + assigned + ", rit=" + rit +
729       ", location=" + metaTableLocator.getMetaRegionLocation(this.getZooKeeper()));
730     status.setStatus("META assigned.");
731   }
732 
733   void initNamespace() throws IOException {
734     //create namespace manager
735     tableNamespaceManager = new TableNamespaceManager(this);
736     tableNamespaceManager.start();
737   }
738 
739   boolean isCatalogJanitorEnabled() {
740     return catalogJanitorChore != null ?
741       catalogJanitorChore.getEnabled() : false;
742   }
743 
744   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
745     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
746       // In log replay mode, we mark hbase:meta region as recovering in ZK
747       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
748       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
749       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
750     } else {
751       // In recovered.edits mode: create recovered edits file for hbase:meta server
752       this.fileSystemManager.splitMetaLog(currentMetaServer);
753     }
754   }
755 
756   private void enableServerShutdownHandler(
757       final boolean waitForMeta) throws IOException, InterruptedException {
758     // If ServerShutdownHandler is disabled, we enable it and expire those dead
759     // but not expired servers. This is required so that if meta is assigning to
760     // a server which dies after assignMeta starts assignment,
761     // SSH can re-assign it. Otherwise, we will be
762     // stuck here waiting forever if waitForMeta is specified.
763     if (!serverShutdownHandlerEnabled) {
764       serverShutdownHandlerEnabled = true;
765       this.serverManager.processQueuedDeadServers();
766     }
767 
768     if (waitForMeta) {
769       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
770       // Above check waits for general meta availability but this does not
771       // guarantee that the transition has completed
772       this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
773     }
774   }
775 
776   private void enableMeta(TableName metaTableName) {
777     if (!this.assignmentManager.getTableStateManager().isTableState(metaTableName,
778         ZooKeeperProtos.Table.State.ENABLED)) {
779       this.assignmentManager.setEnabledTable(metaTableName);
780     }
781   }
782 
783   /**
784    * This function returns a set of region server names under hbase:meta recovering region ZK node
785    * @return Set of meta server names which were recorded in ZK
786    * @throws KeeperException
787    */
788   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
789     Set<ServerName> result = new HashSet<ServerName>();
790     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
791       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
792     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
793     if (regionFailedServers == null) return result;
794 
795     for(String failedServer : regionFailedServers) {
796       ServerName server = ServerName.parseServerName(failedServer);
797       result.add(server);
798     }
799     return result;
800   }
801 
802   @Override
803   public TableDescriptors getTableDescriptors() {
804     return this.tableDescriptors;
805   }
806 
807   @Override
808   public ServerManager getServerManager() {
809     return this.serverManager;
810   }
811 
812   @Override
813   public MasterFileSystem getMasterFileSystem() {
814     return this.fileSystemManager;
815   }
816 
817   /*
818    * Start up all services. If any of these threads gets an unhandled exception
819    * then they just die with a logged message.  This should be fine because
820    * in general, we do not expect the master to get such unhandled exceptions
821    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
822    *  need to install an unexpected exception handler.
823    */
824   private void startServiceThreads() throws IOException{
825    // Start the executor service pools
826    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
827       conf.getInt("hbase.master.executor.openregion.threads", 5));
828    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
829       conf.getInt("hbase.master.executor.closeregion.threads", 5));
830    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
831       conf.getInt("hbase.master.executor.serverops.threads", 5));
832    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
833       conf.getInt("hbase.master.executor.serverops.threads", 5));
834    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
835       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
836 
837    // We depend on there being only one instance of this executor running
838    // at a time.  To do concurrency, would need fencing of enable/disable of
839    // tables.
840    // Any time changing this maxThreads to > 1, pls see the comment at
841    // AccessController#postCreateTableHandler
842    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
843 
844    // Start log cleaner thread
845    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
846    this.logCleaner =
847       new LogCleaner(cleanerInterval,
848          this, conf, getMasterFileSystem().getFileSystem(),
849          getMasterFileSystem().getOldLogDir());
850          Threads.setDaemonThreadRunning(logCleaner.getThread(), getName() + ".oldLogCleaner");
851 
852    //start the hfile archive cleaner thread
853     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
854     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
855         .getFileSystem(), archiveDir);
856     Threads.setDaemonThreadRunning(hfileCleaner.getThread(),
857       getName() + ".archivedHFileCleaner");
858 
859     serviceStarted = true;
860     if (LOG.isTraceEnabled()) {
861       LOG.trace("Started service threads");
862     }
863   }
864 
865   protected void stopServiceThreads() {
866     if (masterJettyServer != null) {
867       LOG.info("Stopping master jetty server");
868       try {
869         masterJettyServer.stop();
870       } catch (Exception e) {
871         LOG.error("Failed to stop master jetty server", e);
872       }
873     }
874     super.stopServiceThreads();
875     stopChores();
876     // Wait for all the remaining region servers to report in IFF we were
877     // running a cluster shutdown AND we were NOT aborting.
878     if (!isAborted() && this.serverManager != null &&
879         this.serverManager.isClusterShutdown()) {
880       this.serverManager.letRegionServersShutdown();
881     }
882     if (LOG.isDebugEnabled()) {
883       LOG.debug("Stopping service threads");
884     }
885     // Clean up and close up shop
886     if (this.logCleaner!= null) this.logCleaner.interrupt();
887     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
888     if (this.activeMasterManager != null) this.activeMasterManager.stop();
889     if (this.serverManager != null) this.serverManager.stop();
890     if (this.assignmentManager != null) this.assignmentManager.stop();
891     if (this.fileSystemManager != null) this.fileSystemManager.stop();
892     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
893   }
894 
895   private void stopChores() {
896     if (this.balancerChore != null) {
897       this.balancerChore.interrupt();
898     }
899     if (this.clusterStatusChore != null) {
900       this.clusterStatusChore.interrupt();
901     }
902     if (this.catalogJanitorChore != null) {
903       this.catalogJanitorChore.interrupt();
904     }
905     if (this.clusterStatusPublisherChore != null){
906       clusterStatusPublisherChore.interrupt();
907     }
908     if (this.namespaceJanitorChore != null){
909       namespaceJanitorChore.interrupt();
910     }
911   }
912 
913   /**
914    * @return Get remote side's InetAddress
915    * @throws UnknownHostException
916    */
917   InetAddress getRemoteInetAddress(final int port,
918       final long serverStartCode) throws UnknownHostException {
919     // Do it out here in its own little method so can fake an address when
920     // mocking up in tests.
921     InetAddress ia = RpcServer.getRemoteIp();
922 
923     // The call could be from the local regionserver,
924     // in which case, there is no remote address.
925     if (ia == null && serverStartCode == startcode) {
926       InetSocketAddress isa = rpcServices.getSocketAddress();
927       if (isa != null && isa.getPort() == port) {
928         ia = isa.getAddress();
929       }
930     }
931     return ia;
932   }
933 
934   /**
935    * @return Maximum time we should run balancer for
936    */
937   private int getBalancerCutoffTime() {
938     int balancerCutoffTime =
939       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
940     if (balancerCutoffTime == -1) {
941       // No time period set so create one
942       int balancerPeriod =
943         getConfiguration().getInt("hbase.balancer.period", 300000);
944       balancerCutoffTime = balancerPeriod;
945       // If nonsense period, set it to balancerPeriod
946       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
947     }
948     return balancerCutoffTime;
949   }
950 
951   public boolean balance() throws IOException {
952     // if master not initialized, don't run balancer.
953     if (!this.initialized) {
954       LOG.debug("Master has not been initialized, don't run balancer.");
955       return false;
956     }
957     // Do this call outside of synchronized block.
958     int maximumBalanceTime = getBalancerCutoffTime();
959     synchronized (this.balancer) {
960       // If balance not true, don't run balancer.
961       if (!this.loadBalancerTracker.isBalancerOn()) return false;
962       // Only allow one balance run at at time.
963       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
964         Map<String, RegionState> regionsInTransition =
965           this.assignmentManager.getRegionStates().getRegionsInTransition();
966         LOG.debug("Not running balancer because " + regionsInTransition.size() +
967           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
968             abbreviate(regionsInTransition.toString(), 256));
969         return false;
970       }
971       if (this.serverManager.areDeadServersInProgress()) {
972         LOG.debug("Not running balancer because processing dead regionserver(s): " +
973           this.serverManager.getDeadServers());
974         return false;
975       }
976 
977       if (this.cpHost != null) {
978         try {
979           if (this.cpHost.preBalance()) {
980             LOG.debug("Coprocessor bypassing balancer request");
981             return false;
982           }
983         } catch (IOException ioe) {
984           LOG.error("Error invoking master coprocessor preBalance()", ioe);
985           return false;
986         }
987       }
988 
989       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
990         this.assignmentManager.getRegionStates().getAssignmentsByTable();
991 
992       List<RegionPlan> plans = new ArrayList<RegionPlan>();
993       //Give the balancer the current cluster state.
994       this.balancer.setClusterStatus(getClusterStatus());
995       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
996         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
997         if (partialPlans != null) plans.addAll(partialPlans);
998       }
999       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1000       int rpCount = 0;  // number of RegionPlans balanced so far
1001       long totalRegPlanExecTime = 0;
1002       if (plans != null && !plans.isEmpty()) {
1003         for (RegionPlan plan: plans) {
1004           LOG.info("balance " + plan);
1005           long balStartTime = System.currentTimeMillis();
1006           //TODO: bulk assign
1007           this.assignmentManager.balance(plan);
1008           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1009           rpCount++;
1010           if (rpCount < plans.size() &&
1011               // if performing next balance exceeds cutoff time, exit the loop
1012               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1013             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1014             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1015               maximumBalanceTime);
1016             break;
1017           }
1018         }
1019       }
1020       if (this.cpHost != null) {
1021         try {
1022           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1023         } catch (IOException ioe) {
1024           // balancing already succeeded so don't change the result
1025           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1026         }
1027       }
1028     }
1029     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1030     // Return true indicating a success.
1031     return true;
1032   }
1033 
1034   /**
1035    * @return Client info for use as prefix on an audit log string; who did an action
1036    */
1037   String getClientIdAuditPrefix() {
1038     return "Client=" + RequestContext.getRequestUserName() + "/" +
1039       RequestContext.get().getRemoteAddress();
1040   }
1041 
1042   /**
1043    * Switch for the background CatalogJanitor thread.
1044    * Used for testing.  The thread will continue to run.  It will just be a noop
1045    * if disabled.
1046    * @param b If false, the catalog janitor won't do anything.
1047    */
1048   public void setCatalogJanitorEnabled(final boolean b) {
1049     this.catalogJanitorChore.setEnabled(b);
1050   }
1051 
1052   @Override
1053   public void dispatchMergingRegions(final HRegionInfo region_a,
1054       final HRegionInfo region_b, final boolean forcible) throws IOException {
1055     checkInitialized();
1056     this.service.submit(new DispatchMergingRegionHandler(this,
1057         this.catalogJanitorChore, region_a, region_b, forcible));
1058   }
1059 
1060   void move(final byte[] encodedRegionName,
1061       final byte[] destServerName) throws HBaseIOException {
1062     RegionState regionState = assignmentManager.getRegionStates().
1063       getRegionState(Bytes.toString(encodedRegionName));
1064     if (regionState == null) {
1065       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1066     }
1067 
1068     HRegionInfo hri = regionState.getRegion();
1069     ServerName dest;
1070     if (destServerName == null || destServerName.length == 0) {
1071       LOG.info("Passed destination servername is null/empty so " +
1072         "choosing a server at random");
1073       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1074         regionState.getServerName());
1075       dest = balancer.randomAssignment(hri, destServers);
1076     } else {
1077       dest = ServerName.valueOf(Bytes.toString(destServerName));
1078       if (dest.equals(regionState.getServerName())) {
1079         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1080           + " because region already assigned to the same server " + dest + ".");
1081         return;
1082       }
1083     }
1084 
1085     // Now we can do the move
1086     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1087 
1088     try {
1089       checkInitialized();
1090       if (this.cpHost != null) {
1091         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1092           return;
1093         }
1094       }
1095       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1096       this.assignmentManager.balance(rp);
1097       if (this.cpHost != null) {
1098         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1099       }
1100     } catch (IOException ioe) {
1101       if (ioe instanceof HBaseIOException) {
1102         throw (HBaseIOException)ioe;
1103       }
1104       throw new HBaseIOException(ioe);
1105     }
1106   }
1107 
1108   @Override
1109   public void createTable(HTableDescriptor hTableDescriptor,
1110       byte [][] splitKeys) throws IOException {
1111     if (isStopped()) {
1112       throw new MasterNotRunningException();
1113     }
1114 
1115     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1116     getNamespaceDescriptor(namespace); // ensure namespace exists
1117 
1118     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1119     checkInitialized();
1120     sanityCheckTableDescriptor(hTableDescriptor);
1121     if (cpHost != null) {
1122       cpHost.preCreateTable(hTableDescriptor, newRegions);
1123     }
1124     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1125     this.service.submit(new CreateTableHandler(this,
1126       this.fileSystemManager, hTableDescriptor, conf,
1127       newRegions, this).prepare());
1128     if (cpHost != null) {
1129       cpHost.postCreateTable(hTableDescriptor, newRegions);
1130     }
1131 
1132   }
1133 
1134   /**
1135    * Checks whether the table conforms to some sane limits, and configured
1136    * values (compression, etc) work. Throws an exception if something is wrong.
1137    * @throws IOException
1138    */
1139   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1140     final String CONF_KEY = "hbase.table.sanity.checks";
1141     if (!conf.getBoolean(CONF_KEY, true)) {
1142       return;
1143     }
1144     String tableVal = htd.getConfigurationValue(CONF_KEY);
1145     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1146       return;
1147     }
1148 
1149     // check max file size
1150     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1151     long maxFileSize = htd.getMaxFileSize();
1152     if (maxFileSize < 0) {
1153       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1154     }
1155     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1156       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1157         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1158         + ") is too small, which might cause over splitting into unmanageable "
1159         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1160           + "if you want to bypass sanity checks");
1161     }
1162 
1163     // check flush size
1164     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1165     long flushSize = htd.getMemStoreFlushSize();
1166     if (flushSize < 0) {
1167       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1168     }
1169     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1170       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1171           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1172           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1173           + "if you want to bypass sanity checks");
1174     }
1175 
1176     // check split policy class can be loaded
1177     try {
1178       RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1179     } catch (Exception ex) {
1180       throw new DoNotRetryIOException(ex);
1181     }
1182 
1183     // check compression can be loaded
1184     checkCompression(htd);
1185 
1186     // check that we have at least 1 CF
1187     if (htd.getColumnFamilies().length == 0) {
1188       throw new DoNotRetryIOException("Table should have at least one column family "
1189           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1190     }
1191 
1192     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1193       if (hcd.getTimeToLive() <= 0) {
1194         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1195           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1196           + "if you want to bypass sanity checks");
1197       }
1198 
1199       // check blockSize
1200       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1201         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1202           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1203           + "if you want to bypass sanity checks");
1204       }
1205 
1206       // check versions
1207       if (hcd.getMinVersions() < 0) {
1208         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1209           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1210           + "if you want to bypass sanity checks");
1211       }
1212       // max versions already being checked
1213 
1214       // check replication scope
1215       if (hcd.getScope() < 0) {
1216         throw new DoNotRetryIOException("Replication scope for column family "
1217           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1218           + "or table descriptor if you want to bypass sanity checks");
1219       }
1220 
1221       // TODO: should we check coprocessors and encryption ?
1222     }
1223   }
1224 
1225   private void startActiveMasterManager() throws KeeperException {
1226     String backupZNode = ZKUtil.joinZNode(
1227       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1228     /*
1229     * Add a ZNode for ourselves in the backup master directory since we
1230     * may not become the active master. If so, we want the actual active
1231     * master to know we are backup masters, so that it won't assign
1232     * regions to us if so configured.
1233     *
1234     * If we become the active master later, ActiveMasterManager will delete
1235     * this node explicitly.  If we crash before then, ZooKeeper will delete
1236     * this node for us since it is ephemeral.
1237     */
1238     LOG.info("Adding ZNode for " + backupZNode + " in backup master directory");
1239     MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName);
1240 
1241     activeMasterManager = new ActiveMasterManager(zooKeeper, serverName, this);
1242     // Start a thread to try to become the active master, so we won't block here
1243     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1244       public void run() {
1245         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1246           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1247         // If we're a backup master, stall until a primary to writes his address
1248         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1249             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1250           LOG.debug("HMaster started in backup mode. "
1251             + "Stalling until master znode is written.");
1252           // This will only be a minute or so while the cluster starts up,
1253           // so don't worry about setting watches on the parent znode
1254           while (!activeMasterManager.hasActiveMaster()) {
1255             LOG.debug("Waiting for master address ZNode to be written "
1256               + "(Also watching cluster state node)");
1257             Threads.sleep(timeout);
1258           }
1259         }
1260         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1261         status.setDescription("Master startup");
1262         try {
1263           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1264             finishActiveMasterInitialization(status);
1265           }
1266         } catch (Throwable t) {
1267           status.setStatus("Failed to become active: " + t.getMessage());
1268           LOG.fatal("Failed to become active master", t);
1269           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1270           if (t instanceof NoClassDefFoundError &&
1271               t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1272             // improved error message for this special case
1273             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1274               + "recompile HBase against Hadoop version "
1275               +  org.apache.hadoop.util.VersionInfo.getVersion()
1276               + " or change your hadoop jars to start properly", t);
1277           } else {
1278             abort("Unhandled exception. Starting shutdown.", t);
1279           }
1280         } finally {
1281           status.cleanup();
1282         }
1283       }
1284     }, "ActiveMasterManager"));
1285   }
1286 
1287   private void checkCompression(final HTableDescriptor htd)
1288   throws IOException {
1289     if (!this.masterCheckCompression) return;
1290     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1291       checkCompression(hcd);
1292     }
1293   }
1294 
1295   private void checkCompression(final HColumnDescriptor hcd)
1296   throws IOException {
1297     if (!this.masterCheckCompression) return;
1298     CompressionTest.testCompression(hcd.getCompression());
1299     CompressionTest.testCompression(hcd.getCompactionCompression());
1300   }
1301 
1302   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1303     byte[][] splitKeys) {
1304     long regionId = System.currentTimeMillis();
1305     HRegionInfo[] hRegionInfos = null;
1306     if (splitKeys == null || splitKeys.length == 0) {
1307       hRegionInfos = new HRegionInfo[]{new HRegionInfo(hTableDescriptor.getTableName(), null, null,
1308                 false, regionId)};
1309     } else {
1310       int numRegions = splitKeys.length + 1;
1311       hRegionInfos = new HRegionInfo[numRegions];
1312       byte[] startKey = null;
1313       byte[] endKey = null;
1314       for (int i = 0; i < numRegions; i++) {
1315         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1316         hRegionInfos[i] =
1317              new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey,
1318                  false, regionId);
1319         startKey = endKey;
1320       }
1321     }
1322     return hRegionInfos;
1323   }
1324 
1325   private static boolean isCatalogTable(final TableName tableName) {
1326     return tableName.equals(TableName.META_TABLE_NAME);
1327   }
1328 
1329   @Override
1330   public void deleteTable(final TableName tableName) throws IOException {
1331     checkInitialized();
1332     if (cpHost != null) {
1333       cpHost.preDeleteTable(tableName);
1334     }
1335     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1336     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1337     if (cpHost != null) {
1338       cpHost.postDeleteTable(tableName);
1339     }
1340   }
1341 
1342   @Override
1343   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1344     checkInitialized();
1345     if (cpHost != null) {
1346       cpHost.preTruncateTable(tableName);
1347     }
1348     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1349     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1350     handler.prepare();
1351     handler.process();
1352     if (cpHost != null) {
1353       cpHost.postTruncateTable(tableName);
1354     }
1355   }
1356 
1357   @Override
1358   public void addColumn(final TableName tableName, final HColumnDescriptor column)
1359       throws IOException {
1360     checkInitialized();
1361     if (cpHost != null) {
1362       if (cpHost.preAddColumn(tableName, column)) {
1363         return;
1364       }
1365     }
1366     //TODO: we should process this (and some others) in an executor
1367     new TableAddFamilyHandler(tableName, column, this, this).prepare().process();
1368     if (cpHost != null) {
1369       cpHost.postAddColumn(tableName, column);
1370     }
1371   }
1372 
1373   @Override
1374   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1375       throws IOException {
1376     checkInitialized();
1377     checkCompression(descriptor);
1378     if (cpHost != null) {
1379       if (cpHost.preModifyColumn(tableName, descriptor)) {
1380         return;
1381       }
1382     }
1383     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1384     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1385       .prepare().process();
1386     if (cpHost != null) {
1387       cpHost.postModifyColumn(tableName, descriptor);
1388     }
1389   }
1390 
1391   @Override
1392   public void deleteColumn(final TableName tableName, final byte[] columnName)
1393       throws IOException {
1394     checkInitialized();
1395     if (cpHost != null) {
1396       if (cpHost.preDeleteColumn(tableName, columnName)) {
1397         return;
1398       }
1399     }
1400     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1401     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1402     if (cpHost != null) {
1403       cpHost.postDeleteColumn(tableName, columnName);
1404     }
1405   }
1406 
1407   @Override
1408   public void enableTable(final TableName tableName) throws IOException {
1409     checkInitialized();
1410     if (cpHost != null) {
1411       cpHost.preEnableTable(tableName);
1412     }
1413     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1414     this.service.submit(new EnableTableHandler(this, tableName,
1415       assignmentManager, tableLockManager, false).prepare());
1416     if (cpHost != null) {
1417       cpHost.postEnableTable(tableName);
1418    }
1419   }
1420 
1421   @Override
1422   public void disableTable(final TableName tableName) throws IOException {
1423     checkInitialized();
1424     if (cpHost != null) {
1425       cpHost.preDisableTable(tableName);
1426     }
1427     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1428     this.service.submit(new DisableTableHandler(this, tableName,
1429       assignmentManager, tableLockManager, false).prepare());
1430     if (cpHost != null) {
1431       cpHost.postDisableTable(tableName);
1432     }
1433   }
1434 
1435   /**
1436    * Return the region and current deployment for the region containing
1437    * the given row. If the region cannot be found, returns null. If it
1438    * is found, but not currently deployed, the second element of the pair
1439    * may be null.
1440    */
1441   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1442       final TableName tableName, final byte [] rowKey)
1443   throws IOException {
1444     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1445       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1446 
1447     MetaScannerVisitor visitor =
1448       new MetaScannerVisitorBase() {
1449         @Override
1450         public boolean processRow(Result data) throws IOException {
1451           if (data == null || data.size() <= 0) {
1452             return true;
1453           }
1454           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1455           if (pair == null) {
1456             return false;
1457           }
1458           if (!pair.getFirst().getTable().equals(tableName)) {
1459             return false;
1460           }
1461           result.set(pair);
1462           return true;
1463         }
1464     };
1465 
1466     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1467     return result.get();
1468   }
1469 
1470   @Override
1471   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1472       throws IOException {
1473     checkInitialized();
1474     sanityCheckTableDescriptor(descriptor);
1475     if (cpHost != null) {
1476       cpHost.preModifyTable(tableName, descriptor);
1477     }
1478     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1479     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1480     if (cpHost != null) {
1481       cpHost.postModifyTable(tableName, descriptor);
1482     }
1483   }
1484 
1485   @Override
1486   public void checkTableModifiable(final TableName tableName)
1487       throws IOException, TableNotFoundException, TableNotDisabledException {
1488     if (isCatalogTable(tableName)) {
1489       throw new IOException("Can't modify catalog tables");
1490     }
1491     if (!MetaTableAccessor.tableExists(getShortCircuitConnection(), tableName)) {
1492       throw new TableNotFoundException(tableName);
1493     }
1494     if (!getAssignmentManager().getTableStateManager().
1495         isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
1496       throw new TableNotDisabledException(tableName);
1497     }
1498   }
1499 
1500   /**
1501    * @return cluster status
1502    */
1503   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1504     // Build Set of backup masters from ZK nodes
1505     List<String> backupMasterStrings;
1506     try {
1507       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1508         this.zooKeeper.backupMasterAddressesZNode);
1509     } catch (KeeperException e) {
1510       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1511       backupMasterStrings = new ArrayList<String>(0);
1512     }
1513     List<ServerName> backupMasters = new ArrayList<ServerName>(
1514                                           backupMasterStrings.size());
1515     for (String s: backupMasterStrings) {
1516       try {
1517         byte [] bytes;
1518         try {
1519           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1520               this.zooKeeper.backupMasterAddressesZNode, s));
1521         } catch (InterruptedException e) {
1522           throw new InterruptedIOException();
1523         }
1524         if (bytes != null) {
1525           ServerName sn;
1526           try {
1527             sn = ServerName.parseFrom(bytes);
1528           } catch (DeserializationException e) {
1529             LOG.warn("Failed parse, skipping registering backup server", e);
1530             continue;
1531           }
1532           backupMasters.add(sn);
1533         }
1534       } catch (KeeperException e) {
1535         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1536                  "backup servers"), e);
1537       }
1538     }
1539     Collections.sort(backupMasters, new Comparator<ServerName>() {
1540       @Override
1541       public int compare(ServerName s1, ServerName s2) {
1542         return s1.getServerName().compareTo(s2.getServerName());
1543       }});
1544 
1545     String clusterId = fileSystemManager != null ?
1546       fileSystemManager.getClusterId().toString() : null;
1547     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1548       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1549     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1550     boolean balancerOn = loadBalancerTracker != null ?
1551       loadBalancerTracker.isBalancerOn() : false;
1552     Map<ServerName, ServerLoad> onlineServers = null;
1553     Set<ServerName> deadServers = null;
1554     if (serverManager != null) {
1555       deadServers = serverManager.getDeadServers().copyServerNames();
1556       onlineServers = serverManager.getOnlineServers();
1557     }
1558     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1559       onlineServers, deadServers, serverName, backupMasters,
1560       regionsInTransition, coprocessors, balancerOn);
1561   }
1562 
1563   /**
1564    * The set of loaded coprocessors is stored in a static set. Since it's
1565    * statically allocated, it does not require that HMaster's cpHost be
1566    * initialized prior to accessing it.
1567    * @return a String representation of the set of names of the loaded
1568    * coprocessors.
1569    */
1570   public static String getLoadedCoprocessors() {
1571     return CoprocessorHost.getLoadedCoprocessors().toString();
1572   }
1573 
1574   /**
1575    * @return timestamp in millis when HMaster was started.
1576    */
1577   public long getMasterStartTime() {
1578     return startcode;
1579   }
1580 
1581   /**
1582    * @return timestamp in millis when HMaster became the active master.
1583    */
1584   public long getMasterActiveTime() {
1585     return masterActiveTime;
1586   }
1587 
1588   public int getRegionServerInfoPort(final ServerName sn) {
1589     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1590     if (info == null || info.getInfoPort() == 0) {
1591       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1592         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1593     }
1594     return info.getInfoPort();
1595   }
1596 
1597   /**
1598    * @return array of coprocessor SimpleNames.
1599    */
1600   public String[] getMasterCoprocessors() {
1601     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1602     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1603   }
1604 
1605   @Override
1606   public void abort(final String msg, final Throwable t) {
1607     if (cpHost != null) {
1608       // HBASE-4014: dump a list of loaded coprocessors.
1609       LOG.fatal("Master server abort: loaded coprocessors are: " +
1610           getLoadedCoprocessors());
1611     }
1612     if (t != null) LOG.fatal(msg, t);
1613     stop(msg);
1614   }
1615 
1616   @Override
1617   public ZooKeeperWatcher getZooKeeper() {
1618     return zooKeeper;
1619   }
1620 
1621   @Override
1622   public MasterCoprocessorHost getMasterCoprocessorHost() {
1623     return cpHost;
1624   }
1625 
1626   @Override
1627   public ServerName getServerName() {
1628     return this.serverName;
1629   }
1630 
1631   @Override
1632   public AssignmentManager getAssignmentManager() {
1633     return this.assignmentManager;
1634   }
1635 
1636   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1637     return rsFatals;
1638   }
1639 
1640   public void shutdown() {
1641     if (cpHost != null) {
1642       try {
1643         cpHost.preShutdown();
1644       } catch (IOException ioe) {
1645         LOG.error("Error call master coprocessor preShutdown()", ioe);
1646       }
1647     }
1648     if (this.assignmentManager != null) {
1649       this.assignmentManager.shutdown();
1650     }
1651     try {
1652       if (this.clusterStatusTracker != null){
1653         this.clusterStatusTracker.setClusterDown();
1654         if (this.serverManager != null) {
1655           this.serverManager.shutdownCluster();
1656         }
1657       }
1658     } catch (KeeperException e) {
1659       LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1660     }
1661   }
1662 
1663   public void stopMaster() {
1664     if (cpHost != null) {
1665       try {
1666         cpHost.preStopMaster();
1667       } catch (IOException ioe) {
1668         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1669       }
1670     }
1671     stop("Stopped by " + Thread.currentThread().getName());
1672   }
1673 
1674   void checkServiceStarted() throws ServerNotRunningYetException {
1675     if (!serviceStarted) {
1676       throw new ServerNotRunningYetException("Server is not running yet");
1677     }
1678   }
1679 
1680   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1681     checkServiceStarted();
1682     if (!this.initialized) {
1683       throw new PleaseHoldException("Master is initializing");
1684     }
1685   }
1686 
1687   void checkNamespaceManagerReady() throws IOException {
1688     checkInitialized();
1689     if (tableNamespaceManager == null ||
1690         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1691       throw new IOException("Table Namespace Manager not ready yet, try again later");
1692     }
1693   }
1694   /**
1695    * Report whether this master is currently the active master or not.
1696    * If not active master, we are parked on ZK waiting to become active.
1697    *
1698    * This method is used for testing.
1699    *
1700    * @return true if active master, false if not.
1701    */
1702   public boolean isActiveMaster() {
1703     return isActiveMaster;
1704   }
1705 
1706   /**
1707    * Report whether this master has completed with its initialization and is
1708    * ready.  If ready, the master is also the active master.  A standby master
1709    * is never ready.
1710    *
1711    * This method is used for testing.
1712    *
1713    * @return true if master is ready to go, false if not.
1714    */
1715   @Override
1716   public boolean isInitialized() {
1717     return initialized;
1718   }
1719 
1720   /**
1721    * ServerShutdownHandlerEnabled is set false before completing
1722    * assignMeta to prevent processing of ServerShutdownHandler.
1723    * @return true if assignMeta has completed;
1724    */
1725   @Override
1726   public boolean isServerShutdownHandlerEnabled() {
1727     return this.serverShutdownHandlerEnabled;
1728   }
1729 
1730   /**
1731    * Report whether this master has started initialization and is about to do meta region assignment
1732    * @return true if master is in initialization & about to assign hbase:meta regions
1733    */
1734   public boolean isInitializationStartsMetaRegionAssignment() {
1735     return this.initializationBeforeMetaAssignment;
1736   }
1737 
1738   public void assignRegion(HRegionInfo hri) {
1739     assignmentManager.assign(hri, true);
1740   }
1741 
1742   /**
1743    * Compute the average load across all region servers.
1744    * Currently, this uses a very naive computation - just uses the number of
1745    * regions being served, ignoring stats about number of requests.
1746    * @return the average load
1747    */
1748   public double getAverageLoad() {
1749     if (this.assignmentManager == null) {
1750       return 0;
1751     }
1752 
1753     RegionStates regionStates = this.assignmentManager.getRegionStates();
1754     if (regionStates == null) {
1755       return 0;
1756     }
1757     return regionStates.getAverageLoad();
1758   }
1759 
1760   @Override
1761   public boolean registerService(Service instance) {
1762     /*
1763      * No stacking of instances is allowed for a single service name
1764      */
1765     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1766     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1767       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1768           " already registered, rejecting request from "+instance
1769       );
1770       return false;
1771     }
1772 
1773     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
1774     if (LOG.isDebugEnabled()) {
1775       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
1776     }
1777     return true;
1778   }
1779 
1780   /**
1781    * Utility for constructing an instance of the passed HMaster class.
1782    * @param masterClass
1783    * @param conf
1784    * @return HMaster instance.
1785    */
1786   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
1787       final Configuration conf, final CoordinatedStateManager cp)  {
1788     try {
1789       Constructor<? extends HMaster> c =
1790         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
1791       return c.newInstance(conf, cp);
1792     } catch (InvocationTargetException ite) {
1793       Throwable target = ite.getTargetException() != null?
1794         ite.getTargetException(): ite;
1795       if (target.getCause() != null) target = target.getCause();
1796       throw new RuntimeException("Failed construction of Master: " +
1797         masterClass.toString(), target);
1798     } catch (Exception e) {
1799       throw new RuntimeException("Failed construction of Master: " +
1800         masterClass.toString() + ((e.getCause() != null)?
1801           e.getCause().getMessage(): ""), e);
1802     }
1803   }
1804 
1805   /**
1806    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
1807    */
1808   public static void main(String [] args) {
1809     VersionInfo.logVersion();
1810     new HMasterCommandLine(HMaster.class).doMain(args);
1811   }
1812 
1813   public HFileCleaner getHFileCleaner() {
1814     return this.hfileCleaner;
1815   }
1816 
1817   /**
1818    * Exposed for TESTING!
1819    * @return the underlying snapshot manager
1820    */
1821   public SnapshotManager getSnapshotManagerForTesting() {
1822     return this.snapshotManager;
1823   }
1824 
1825   @Override
1826   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
1827     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1828     checkNamespaceManagerReady();
1829     if (cpHost != null) {
1830       if (cpHost.preCreateNamespace(descriptor)) {
1831         return;
1832       }
1833     }
1834     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
1835     tableNamespaceManager.create(descriptor);
1836     if (cpHost != null) {
1837       cpHost.postCreateNamespace(descriptor);
1838     }
1839   }
1840 
1841   @Override
1842   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
1843     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1844     checkNamespaceManagerReady();
1845     if (cpHost != null) {
1846       if (cpHost.preModifyNamespace(descriptor)) {
1847         return;
1848       }
1849     }
1850     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1851     tableNamespaceManager.update(descriptor);
1852     if (cpHost != null) {
1853       cpHost.postModifyNamespace(descriptor);
1854     }
1855   }
1856 
1857   @Override
1858   public void deleteNamespace(String name) throws IOException {
1859     checkNamespaceManagerReady();
1860     if (cpHost != null) {
1861       if (cpHost.preDeleteNamespace(name)) {
1862         return;
1863       }
1864     }
1865     LOG.info(getClientIdAuditPrefix() + " delete " + name);
1866     tableNamespaceManager.remove(name);
1867     if (cpHost != null) {
1868       cpHost.postDeleteNamespace(name);
1869     }
1870   }
1871 
1872   @Override
1873   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
1874     checkNamespaceManagerReady();
1875     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
1876     if (nsd == null) {
1877       throw new NamespaceNotFoundException(name);
1878     }
1879     return nsd;
1880   }
1881 
1882   @Override
1883   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
1884     checkNamespaceManagerReady();
1885     return Lists.newArrayList(tableNamespaceManager.list());
1886   }
1887 
1888   @Override
1889   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
1890     getNamespaceDescriptor(name); // check that namespace exists
1891     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
1892   }
1893 
1894   @Override
1895   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
1896     List<TableName> tableNames = Lists.newArrayList();
1897     getNamespaceDescriptor(name); // check that namespace exists
1898     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
1899       tableNames.add(descriptor.getTableName());
1900     }
1901     return tableNames;
1902   }
1903 }