View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.lang.reflect.Constructor;
24  import java.lang.reflect.InvocationTargetException;
25  import java.net.InetAddress;
26  import java.net.InetSocketAddress;
27  import java.net.UnknownHostException;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collections;
31  import java.util.Comparator;
32  import java.util.HashSet;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.concurrent.atomic.AtomicReference;
37  
38  import javax.servlet.ServletException;
39  import javax.servlet.http.HttpServlet;
40  import javax.servlet.http.HttpServletRequest;
41  import javax.servlet.http.HttpServletResponse;
42  
43  import org.apache.commons.logging.Log;
44  import org.apache.commons.logging.LogFactory;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.Path;
47  import org.apache.hadoop.hbase.ClusterStatus;
48  import org.apache.hadoop.hbase.CoordinatedStateException;
49  import org.apache.hadoop.hbase.CoordinatedStateManager;
50  import org.apache.hadoop.hbase.DoNotRetryIOException;
51  import org.apache.hadoop.hbase.HBaseIOException;
52  import org.apache.hadoop.hbase.HColumnDescriptor;
53  import org.apache.hadoop.hbase.HConstants;
54  import org.apache.hadoop.hbase.HRegionInfo;
55  import org.apache.hadoop.hbase.HTableDescriptor;
56  import org.apache.hadoop.hbase.MasterNotRunningException;
57  import org.apache.hadoop.hbase.MetaTableAccessor;
58  import org.apache.hadoop.hbase.NamespaceDescriptor;
59  import org.apache.hadoop.hbase.NamespaceNotFoundException;
60  import org.apache.hadoop.hbase.PleaseHoldException;
61  import org.apache.hadoop.hbase.Server;
62  import org.apache.hadoop.hbase.ServerLoad;
63  import org.apache.hadoop.hbase.ServerName;
64  import org.apache.hadoop.hbase.TableDescriptors;
65  import org.apache.hadoop.hbase.TableName;
66  import org.apache.hadoop.hbase.TableNotDisabledException;
67  import org.apache.hadoop.hbase.TableNotFoundException;
68  import org.apache.hadoop.hbase.UnknownRegionException;
69  import org.apache.hadoop.hbase.classification.InterfaceAudience;
70  import org.apache.hadoop.hbase.client.MetaScanner;
71  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
72  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
73  import org.apache.hadoop.hbase.client.Result;
74  import org.apache.hadoop.hbase.client.TableState;
75  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
76  import org.apache.hadoop.hbase.exceptions.DeserializationException;
77  import org.apache.hadoop.hbase.executor.ExecutorType;
78  import org.apache.hadoop.hbase.ipc.RequestContext;
79  import org.apache.hadoop.hbase.ipc.RpcServer;
80  import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
81  import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
82  import org.apache.hadoop.hbase.master.balancer.BalancerChore;
83  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
84  import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
85  import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
86  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
87  import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
88  import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
89  import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
90  import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
91  import org.apache.hadoop.hbase.master.handler.DispatchMergingRegionHandler;
92  import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
93  import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
94  import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
95  import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
96  import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
97  import org.apache.hadoop.hbase.master.handler.TruncateTableHandler;
98  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
99  import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
100 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
101 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
102 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
103 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
104 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
105 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
106 import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
107 import org.apache.hadoop.hbase.regionserver.HRegionServer;
108 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
109 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
110 import org.apache.hadoop.hbase.replication.regionserver.Replication;
111 import org.apache.hadoop.hbase.security.UserProvider;
112 import org.apache.hadoop.hbase.util.Addressing;
113 import org.apache.hadoop.hbase.util.Bytes;
114 import org.apache.hadoop.hbase.util.CompressionTest;
115 import org.apache.hadoop.hbase.util.FSUtils;
116 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
117 import org.apache.hadoop.hbase.util.Pair;
118 import org.apache.hadoop.hbase.util.Threads;
119 import org.apache.hadoop.hbase.util.VersionInfo;
120 import org.apache.hadoop.hbase.util.ZKDataMigrator;
121 import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
122 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
123 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
124 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
125 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
126 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
127 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
128 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
129 import org.apache.zookeeper.KeeperException;
130 import org.apache.zookeeper.Watcher;
131 import org.mortbay.jetty.Connector;
132 import org.mortbay.jetty.nio.SelectChannelConnector;
133 import org.mortbay.jetty.servlet.Context;
134 
135 import com.google.common.annotations.VisibleForTesting;
136 import com.google.common.collect.Lists;
137 import com.google.common.collect.Maps;
138 import com.google.protobuf.Descriptors;
139 import com.google.protobuf.Service;
140 
141 /**
142  * HMaster is the "master server" for HBase. An HBase cluster has one active
143  * master.  If many masters are started, all compete.  Whichever wins goes on to
144  * run the cluster.  All others park themselves in their constructor until
145  * master or cluster shutdown or until the active master loses its lease in
146  * zookeeper.  Thereafter, all running master jostle to take over master role.
147  *
148  * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
149  * this case it will tell all regionservers to go down and then wait on them
150  * all reporting in that they are down.  This master will then shut itself down.
151  *
152  * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
153  *
154  * @see Watcher
155  */
156 @InterfaceAudience.Private
157 @SuppressWarnings("deprecation")
158 public class HMaster extends HRegionServer implements MasterServices, Server {
159   private static final Log LOG = LogFactory.getLog(HMaster.class.getName());
160 
161   // MASTER is name of the webapp and the attribute name used stuffing this
162   //instance into web context.
163   public static final String MASTER = "master";
164 
165   // Manager and zk listener for master election
166   private ActiveMasterManager activeMasterManager;
167   // Region server tracker
168   RegionServerTracker regionServerTracker;
169   // Draining region server tracker
170   private DrainingServerTracker drainingServerTracker;
171   // Tracker for load balancer state
172   LoadBalancerTracker loadBalancerTracker;
173 
174   /** Namespace stuff */
175   private TableNamespaceManager tableNamespaceManager;
176 
177   // Metrics for the HMaster
178   final MetricsMaster metricsMaster;
179   // file system manager for the master FS operations
180   private MasterFileSystem fileSystemManager;
181 
182   // server manager to deal with region server info
183   volatile ServerManager serverManager;
184 
185   // manager of assignment nodes in zookeeper
186   AssignmentManager assignmentManager;
187 
188   // buffer for "fatal error" notices from region servers
189   // in the cluster. This is only used for assisting
190   // operations/debugging.
191   MemoryBoundedLogMessageBuffer rsFatals;
192 
193   // flag set after we become the active master (used for testing)
194   private volatile boolean isActiveMaster = false;
195 
196   // flag set after we complete initialization once active,
197   // it is not private since it's used in unit tests
198   volatile boolean initialized = false;
199 
200   // flag set after master services are started,
201   // initialization may have not completed yet.
202   volatile boolean serviceStarted = false;
203 
204   // flag set after we complete assignMeta.
205   private volatile boolean serverShutdownHandlerEnabled = false;
206 
207   LoadBalancer balancer;
208   private BalancerChore balancerChore;
209   private ClusterStatusChore clusterStatusChore;
210   private ClusterStatusPublisher clusterStatusPublisherChore = null;
211 
212   CatalogJanitor catalogJanitorChore;
213   private LogCleaner logCleaner;
214   private HFileCleaner hfileCleaner;
215 
216   MasterCoprocessorHost cpHost;
217 
218   // Time stamps for when a hmaster became active
219   private long masterActiveTime;
220 
221   //should we check the compression codec type at master side, default true, HBASE-6370
222   private final boolean masterCheckCompression;
223 
224   Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
225 
226   // monitor for snapshot of hbase tables
227   SnapshotManager snapshotManager;
228   // monitor for distributed procedures
229   MasterProcedureManagerHost mpmHost;
230 
231   private MasterQuotaManager quotaManager;
232 
233   // handle table states
234   private TableStateManager tableStateManager;
235 
236   /** flag used in test cases in order to simulate RS failures during master initialization */
237   private volatile boolean initializationBeforeMetaAssignment = false;
238 
239   /** jetty server for master to redirect requests to regionserver infoServer */
240   private org.mortbay.jetty.Server masterJettyServer;
241 
242   public static class RedirectServlet extends HttpServlet {
243     private static final long serialVersionUID = 2894774810058302472L;
244     private static int regionServerInfoPort;
245 
246     @Override
247     public void doGet(HttpServletRequest request,
248         HttpServletResponse response) throws ServletException, IOException {
249       String redirectUrl = request.getScheme() + "://"
250         + request.getServerName() + ":" + regionServerInfoPort
251         + request.getRequestURI();
252       response.sendRedirect(redirectUrl);
253     }
254   }
255 
256   /**
257    * Initializes the HMaster. The steps are as follows:
258    * <p>
259    * <ol>
260    * <li>Initialize the local HRegionServer
261    * <li>Start the ActiveMasterManager.
262    * </ol>
263    * <p>
264    * Remaining steps of initialization occur in
265    * {@link #finishActiveMasterInitialization(MonitoredTask)} after
266    * the master becomes the active one.
267    *
268    * @throws KeeperException
269    * @throws IOException
270    */
271   public HMaster(final Configuration conf, CoordinatedStateManager csm)
272       throws IOException, KeeperException {
273     super(conf, csm);
274     this.rsFatals = new MemoryBoundedLogMessageBuffer(
275       conf.getLong("hbase.master.buffer.for.rs.fatals", 1*1024*1024));
276 
277     LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) +
278         ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
279 
280     Replication.decorateMasterConfiguration(this.conf);
281 
282     // Hack! Maps DFSClient => Master for logs.  HDFS made this
283     // config param for task trackers, but we can piggyback off of it.
284     if (this.conf.get("mapreduce.task.attempt.id") == null) {
285       this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
286     }
287 
288     //should we check the compression codec type at master side, default true, HBASE-6370
289     this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
290 
291     this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
292 
293     // Do we publish the status?
294     boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
295         HConstants.STATUS_PUBLISHED_DEFAULT);
296     Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
297         conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
298             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
299             ClusterStatusPublisher.Publisher.class);
300 
301     if (shouldPublish) {
302       if (publisherClass == null) {
303         LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
304             ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
305             " is not set - not publishing status");
306       } else {
307         clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
308         Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
309       }
310     }
311     startActiveMasterManager();
312     putUpJettyServer();
313   }
314 
315   private void putUpJettyServer() throws IOException {
316     if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
317       return;
318     }
319     int infoPort = conf.getInt("hbase.master.info.port.orig",
320       HConstants.DEFAULT_MASTER_INFOPORT);
321     // -1 is for disabling info server, so no redirecting
322     if (infoPort < 0 || infoServer == null) {
323       return;
324     }
325     String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
326     if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
327       String msg =
328           "Failed to start redirecting jetty server. Address " + addr
329               + " does not belong to this host. Correct configuration parameter: "
330               + "hbase.master.info.bindAddress";
331       LOG.error(msg);
332       throw new IOException(msg);
333     }
334 
335     RedirectServlet.regionServerInfoPort = infoServer.getPort();
336     masterJettyServer = new org.mortbay.jetty.Server();
337     Connector connector = new SelectChannelConnector();
338     connector.setHost(addr);
339     connector.setPort(infoPort);
340     masterJettyServer.addConnector(connector);
341     masterJettyServer.setStopAtShutdown(true);
342     Context context = new Context(masterJettyServer, "/", Context.NO_SESSIONS);
343     context.addServlet(RedirectServlet.class, "/*");
344     try {
345       masterJettyServer.start();
346     } catch (Exception e) {
347       throw new IOException("Failed to start redirecting jetty server", e);
348     }
349   }
350 
351   /**
352    * For compatibility, if failed with regionserver credentials, try the master one
353    */
354   protected void login(UserProvider user, String host) throws IOException {
355     try {
356       super.login(user, host);
357     } catch (IOException ie) {
358       user.login("hbase.master.keytab.file",
359         "hbase.master.kerberos.principal", host);
360     }
361   }
362 
363   /**
364    * If configured to put regions on active master,
365    * wait till a backup master becomes active.
366    * Otherwise, loop till the server is stopped or aborted.
367    */
368   protected void waitForMasterActive(){
369     boolean tablesOnMaster = BaseLoadBalancer.tablesOnMaster(conf);
370     while (!(tablesOnMaster && isActiveMaster)
371         && !isStopped() && !isAborted()) {
372       sleeper.sleep();
373     }
374   }
375 
376   @VisibleForTesting
377   public MasterRpcServices getMasterRpcServices() {
378     return (MasterRpcServices)rpcServices;
379   }
380 
381   public boolean balanceSwitch(final boolean b) throws IOException {
382     return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
383   }
384 
385   protected String getProcessName() {
386     return MASTER;
387   }
388 
389   protected boolean canCreateBaseZNode() {
390     return true;
391   }
392 
393   protected boolean canUpdateTableDescriptor() {
394     return true;
395   }
396 
397   protected RSRpcServices createRpcServices() throws IOException {
398     return new MasterRpcServices(this);
399   }
400 
401   protected void configureInfoServer() {
402     infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
403     infoServer.setAttribute(MASTER, this);
404     if (BaseLoadBalancer.tablesOnMaster(conf)) {
405       super.configureInfoServer();
406     }
407   }
408 
409   protected Class<? extends HttpServlet> getDumpServlet() {
410     return MasterDumpServlet.class;
411   }
412 
413   /**
414    * Emit the HMaster metrics, such as region in transition metrics.
415    * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
416    */
417   protected void doMetrics() {
418     try {
419       if (assignmentManager != null) {
420         assignmentManager.updateRegionsInTransitionMetrics();
421       }
422     } catch (Throwable e) {
423       LOG.error("Couldn't update metrics: " + e.getMessage());
424     }
425   }
426 
427   MetricsMaster getMasterMetrics() {
428     return metricsMaster;
429   }
430 
431   /**
432    * Initialize all ZK based system trackers.
433    * @throws IOException
434    * @throws InterruptedException
435    * @throws KeeperException
436    * @throws CoordinatedStateException
437    */
438   void initializeZKBasedSystemTrackers() throws IOException,
439       InterruptedException, KeeperException, CoordinatedStateException {
440     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
441     this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
442     this.loadBalancerTracker.start();
443     this.assignmentManager = new AssignmentManager(this, serverManager,
444       this.balancer, this.service, this.metricsMaster,
445       this.tableLockManager, tableStateManager);
446 
447     this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
448         this.serverManager);
449     this.regionServerTracker.start();
450 
451     this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
452       this.serverManager);
453     this.drainingServerTracker.start();
454 
455     // Set the cluster as up.  If new RSs, they'll be waiting on this before
456     // going ahead with their startup.
457     boolean wasUp = this.clusterStatusTracker.isClusterUp();
458     if (!wasUp) this.clusterStatusTracker.setClusterUp();
459 
460     LOG.info("Server active/primary master=" + this.serverName +
461         ", sessionid=0x" +
462         Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
463         ", setting cluster-up flag (Was=" + wasUp + ")");
464 
465     // create/initialize the snapshot manager and other procedure managers
466     this.snapshotManager = new SnapshotManager();
467     this.mpmHost = new MasterProcedureManagerHost();
468     this.mpmHost.register(this.snapshotManager);
469     this.mpmHost.register(new MasterFlushTableProcedureManager());
470     this.mpmHost.loadProcedures(conf);
471     this.mpmHost.initialize(this, this.metricsMaster);
472 
473     // migrating existent table state from zk
474     for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator
475         .queryForTableStates(getZooKeeper()).entrySet()) {
476       LOG.info("Converting state from zk to new states:" + entry);
477       tableStateManager.setTableState(entry.getKey(), entry.getValue());
478     }
479     ZKUtil.deleteChildrenRecursively(getZooKeeper(), getZooKeeper().tableZNode);
480   }
481 
482   /**
483    * Finish initialization of HMaster after becoming the primary master.
484    *
485    * <ol>
486    * <li>Initialize master components - file system manager, server manager,
487    *     assignment manager, region server tracker, etc</li>
488    * <li>Start necessary service threads - balancer, catalog janior,
489    *     executor services, etc</li>
490    * <li>Set cluster as UP in ZooKeeper</li>
491    * <li>Wait for RegionServers to check-in</li>
492    * <li>Split logs and perform data recovery, if necessary</li>
493    * <li>Ensure assignment of meta/namespace regions<li>
494    * <li>Handle either fresh cluster start or master failover</li>
495    * </ol>
496    *
497    * @throws IOException
498    * @throws InterruptedException
499    * @throws KeeperException
500    * @throws CoordinatedStateException
501    */
502   private void finishActiveMasterInitialization(MonitoredTask status)
503       throws IOException, InterruptedException, KeeperException, CoordinatedStateException {
504 
505     isActiveMaster = true;
506 
507     /*
508      * We are active master now... go initialize components we need to run.
509      * Note, there may be dross in zk from previous runs; it'll get addressed
510      * below after we determine if cluster startup or failover.
511      */
512 
513     status.setStatus("Initializing Master file system");
514 
515     this.masterActiveTime = System.currentTimeMillis();
516     // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
517     this.fileSystemManager = new MasterFileSystem(this, this);
518 
519     // publish cluster ID
520     status.setStatus("Publishing Cluster ID in ZooKeeper");
521     ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
522     this.serverManager = createServerManager(this, this);
523 
524     synchronized (this) {
525       if (shortCircuitConnection == null) {
526         shortCircuitConnection = createShortCircuitConnection();
527         metaTableLocator = new MetaTableLocator();
528       }
529     }
530 
531     // Invalidate all write locks held previously
532     this.tableLockManager.reapWriteLocks();
533 
534     this.tableStateManager = new TableStateManager(this);
535     this.tableStateManager.start();
536 
537     status.setStatus("Initializing ZK system trackers");
538     initializeZKBasedSystemTrackers();
539 
540     // initialize master side coprocessors before we start handling requests
541     status.setStatus("Initializing master coprocessors");
542     this.cpHost = new MasterCoprocessorHost(this, this.conf);
543 
544     // start up all service threads.
545     status.setStatus("Initializing master service threads");
546     startServiceThreads();
547 
548     // Wake up this server to check in
549     sleeper.skipSleepCycle();
550 
551     // Wait for region servers to report in
552     this.serverManager.waitForRegionServers(status);
553     // Check zk for region servers that are up but didn't register
554     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
555       // The isServerOnline check is opportunistic, correctness is handled inside
556       if (!this.serverManager.isServerOnline(sn)
557           && serverManager.checkAndRecordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD)) {
558         LOG.info("Registered server found up in zk but who has not yet reported in: " + sn);
559       }
560     }
561 
562     // get a list for previously failed RS which need log splitting work
563     // we recover hbase:meta region servers inside master initialization and
564     // handle other failed servers in SSH in order to start up master node ASAP
565     Set<ServerName> previouslyFailedServers = this.fileSystemManager
566         .getFailedServersFromLogFolders();
567 
568     // remove stale recovering regions from previous run
569     this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
570 
571     // log splitting for hbase:meta server
572     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
573     if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
574       splitMetaLogBeforeAssignment(oldMetaServerLocation);
575       // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
576       // may also host user regions
577     }
578     Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
579     // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
580     // instead of previouslyFailedMetaRSs alone to address the following two situations:
581     // 1) the chained failure situation(recovery failed multiple times in a row).
582     // 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
583     // same server still has non-meta wals to be replayed so that
584     // removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
585     // Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
586     // there is no op for the server.
587     previouslyFailedMetaRSs.addAll(previouslyFailedServers);
588 
589     this.initializationBeforeMetaAssignment = true;
590 
591     // Wait for regionserver to finish initialization.
592     if (BaseLoadBalancer.tablesOnMaster(conf)) {
593       waitForServerOnline();
594     }
595 
596     //initialize load balancer
597     this.balancer.setClusterStatus(getClusterStatus());
598     this.balancer.setMasterServices(this);
599     this.balancer.initialize();
600 
601     // Check if master is shutting down because of some issue
602     // in initializing the regionserver or the balancer.
603     if(isStopped()) return;
604 
605     // Make sure meta assigned before proceeding.
606     status.setStatus("Assigning Meta Region");
607     assignMeta(status, previouslyFailedMetaRSs);
608     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
609     // assigned when master is shutting down
610     if(isStopped()) return;
611 
612     status.setStatus("Submitting log splitting work for previously failed region servers");
613     // Master has recovered hbase:meta region server and we put
614     // other failed region servers in a queue to be handled later by SSH
615     for (ServerName tmpServer : previouslyFailedServers) {
616       this.serverManager.processDeadServer(tmpServer, true);
617     }
618 
619     // Fix up assignment manager status
620     status.setStatus("Starting assignment manager");
621     this.assignmentManager.joinCluster();
622 
623     //set cluster status again after user regions are assigned
624     this.balancer.setClusterStatus(getClusterStatus());
625 
626     // Start balancer and meta catalog janitor after meta and regions have
627     // been assigned.
628     status.setStatus("Starting balancer and catalog janitor");
629     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
630     Threads.setDaemonThreadRunning(clusterStatusChore.getThread());
631     this.balancerChore = new BalancerChore(this);
632     Threads.setDaemonThreadRunning(balancerChore.getThread());
633     this.catalogJanitorChore = new CatalogJanitor(this, this);
634     Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
635 
636     status.setStatus("Starting namespace manager");
637     initNamespace();
638 
639     status.setStatus("Starting quota manager");
640     initQuotaManager();
641 
642     if (this.cpHost != null) {
643       try {
644         this.cpHost.preMasterInitialization();
645       } catch (IOException e) {
646         LOG.error("Coprocessor preMasterInitialization() hook failed", e);
647       }
648     }
649 
650     status.markComplete("Initialization successful");
651     LOG.info("Master has completed initialization");
652     initialized = true;
653     // clear the dead servers with same host name and port of online server because we are not
654     // removing dead server with same hostname and port of rs which is trying to check in before
655     // master initialization. See HBASE-5916.
656     this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
657 
658     if (this.cpHost != null) {
659       // don't let cp initialization errors kill the master
660       try {
661         this.cpHost.postStartMaster();
662       } catch (IOException ioe) {
663         LOG.error("Coprocessor postStartMaster() hook failed", ioe);
664       }
665     }
666   }
667 
668   /**
669    * Create a {@link ServerManager} instance.
670    * @param master
671    * @param services
672    * @return An instance of {@link ServerManager}
673    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
674    * @throws IOException
675    */
676   ServerManager createServerManager(final Server master,
677       final MasterServices services)
678   throws IOException {
679     // We put this out here in a method so can do a Mockito.spy and stub it out
680     // w/ a mocked up ServerManager.
681     return new ServerManager(master, services);
682   }
683 
684   /**
685    * Check <code>hbase:meta</code> is assigned. If not, assign it.
686    * @param status MonitoredTask
687    * @param previouslyFailedMetaRSs
688    * @throws InterruptedException
689    * @throws IOException
690    * @throws KeeperException
691    */
692   void assignMeta(MonitoredTask status, Set<ServerName> previouslyFailedMetaRSs)
693       throws InterruptedException, IOException, KeeperException {
694     // Work on meta region
695     int assigned = 0;
696     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
697     status.setStatus("Assigning hbase:meta region");
698 
699     // Get current meta state from zk.
700     RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper());
701 
702     RegionStates regionStates = assignmentManager.getRegionStates();
703     regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO,
704       metaState.getState(), metaState.getServerName(), null);
705 
706     if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation(
707         this.getShortCircuitConnection(), this.getZooKeeper(), timeout)) {
708       ServerName currentMetaServer = metaState.getServerName();
709       if (serverManager.isServerOnline(currentMetaServer)) {
710         LOG.info("Meta was in transition on " + currentMetaServer);
711         assignmentManager.processRegionsInTransition(Arrays.asList(metaState));
712       } else {
713         if (currentMetaServer != null) {
714           splitMetaLogBeforeAssignment(currentMetaServer);
715           regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
716           previouslyFailedMetaRSs.add(currentMetaServer);
717         }
718         LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer);
719         assignmentManager.assignMeta();
720       }
721       assigned++;
722     }
723 
724     enableMeta(TableName.META_TABLE_NAME);
725 
726     if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode())
727         && (!previouslyFailedMetaRSs.isEmpty())) {
728       // replay WAL edits mode need new hbase:meta RS is assigned firstly
729       status.setStatus("replaying log for Meta Region");
730       this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
731     }
732 
733     // Make sure a hbase:meta location is set. We need to enable SSH here since
734     // if the meta region server is died at this time, we need it to be re-assigned
735     // by SSH so that system tables can be assigned.
736     // No need to wait for meta is assigned = 0 when meta is just verified.
737     enableServerShutdownHandler(assigned != 0);
738 
739     LOG.info("hbase:meta assigned=" + assigned + ", location="
740       + metaTableLocator.getMetaRegionLocation(this.getZooKeeper()));
741     status.setStatus("META assigned.");
742   }
743 
744   void initNamespace() throws IOException {
745     //create namespace manager
746     tableNamespaceManager = new TableNamespaceManager(this);
747     tableNamespaceManager.start();
748   }
749 
750   void initQuotaManager() throws IOException {
751     quotaManager = new MasterQuotaManager(this);
752     quotaManager.start();
753   }
754 
755   boolean isCatalogJanitorEnabled() {
756     return catalogJanitorChore != null ?
757       catalogJanitorChore.getEnabled() : false;
758   }
759 
760   private void splitMetaLogBeforeAssignment(ServerName currentMetaServer) throws IOException {
761     if (RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) {
762       // In log replay mode, we mark hbase:meta region as recovering in ZK
763       Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
764       regions.add(HRegionInfo.FIRST_META_REGIONINFO);
765       this.fileSystemManager.prepareLogReplay(currentMetaServer, regions);
766     } else {
767       // In recovered.edits mode: create recovered edits file for hbase:meta server
768       this.fileSystemManager.splitMetaLog(currentMetaServer);
769     }
770   }
771 
772   private void enableServerShutdownHandler(
773       final boolean waitForMeta) throws IOException, InterruptedException {
774     // If ServerShutdownHandler is disabled, we enable it and expire those dead
775     // but not expired servers. This is required so that if meta is assigning to
776     // a server which dies after assignMeta starts assignment,
777     // SSH can re-assign it. Otherwise, we will be
778     // stuck here waiting forever if waitForMeta is specified.
779     if (!serverShutdownHandlerEnabled) {
780       serverShutdownHandlerEnabled = true;
781       this.serverManager.processQueuedDeadServers();
782     }
783 
784     if (waitForMeta) {
785       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
786     }
787   }
788 
789   private void enableMeta(TableName metaTableName) {
790     if (!this.tableStateManager.isTableState(metaTableName,
791             TableState.State.ENABLED)) {
792       this.assignmentManager.setEnabledTable(metaTableName);
793     }
794   }
795 
796   /**
797    * This function returns a set of region server names under hbase:meta recovering region ZK node
798    * @return Set of meta server names which were recorded in ZK
799    * @throws KeeperException
800    */
801   private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
802     Set<ServerName> result = new HashSet<ServerName>();
803     String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
804       HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
805     List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
806     if (regionFailedServers == null) return result;
807 
808     for(String failedServer : regionFailedServers) {
809       ServerName server = ServerName.parseServerName(failedServer);
810       result.add(server);
811     }
812     return result;
813   }
814 
815   @Override
816   public TableDescriptors getTableDescriptors() {
817     return this.tableDescriptors;
818   }
819 
820   @Override
821   public ServerManager getServerManager() {
822     return this.serverManager;
823   }
824 
825   @Override
826   public MasterFileSystem getMasterFileSystem() {
827     return this.fileSystemManager;
828   }
829 
830   @Override
831   public TableStateManager getTableStateManager() {
832     return tableStateManager;
833   }
834 
835   /*
836    * Start up all services. If any of these threads gets an unhandled exception
837    * then they just die with a logged message.  This should be fine because
838    * in general, we do not expect the master to get such unhandled exceptions
839    *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
840    *  need to install an unexpected exception handler.
841    */
842   private void startServiceThreads() throws IOException{
843    // Start the executor service pools
844    this.service.startExecutorService(ExecutorType.MASTER_OPEN_REGION,
845       conf.getInt("hbase.master.executor.openregion.threads", 5));
846    this.service.startExecutorService(ExecutorType.MASTER_CLOSE_REGION,
847       conf.getInt("hbase.master.executor.closeregion.threads", 5));
848    this.service.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
849       conf.getInt("hbase.master.executor.serverops.threads", 5));
850    this.service.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
851       conf.getInt("hbase.master.executor.serverops.threads", 5));
852    this.service.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS,
853       conf.getInt("hbase.master.executor.logreplayops.threads", 10));
854 
855    // We depend on there being only one instance of this executor running
856    // at a time.  To do concurrency, would need fencing of enable/disable of
857    // tables.
858    // Any time changing this maxThreads to > 1, pls see the comment at
859    // AccessController#postCreateTableHandler
860    this.service.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
861 
862    // Start log cleaner thread
863    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 60 * 1000);
864    this.logCleaner =
865       new LogCleaner(cleanerInterval,
866          this, conf, getMasterFileSystem().getFileSystem(),
867          getMasterFileSystem().getOldLogDir());
868          Threads.setDaemonThreadRunning(logCleaner.getThread(),
869            getServerName().toShortString() + ".oldLogCleaner");
870 
871    //start the hfile archive cleaner thread
872     Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
873     this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem()
874         .getFileSystem(), archiveDir);
875     Threads.setDaemonThreadRunning(hfileCleaner.getThread(),
876       getServerName().toShortString() + ".archivedHFileCleaner");
877 
878     serviceStarted = true;
879     if (LOG.isTraceEnabled()) {
880       LOG.trace("Started service threads");
881     }
882   }
883 
884   protected void stopServiceThreads() {
885     if (masterJettyServer != null) {
886       LOG.info("Stopping master jetty server");
887       try {
888         masterJettyServer.stop();
889       } catch (Exception e) {
890         LOG.error("Failed to stop master jetty server", e);
891       }
892     }
893     super.stopServiceThreads();
894     stopChores();
895     // Wait for all the remaining region servers to report in IFF we were
896     // running a cluster shutdown AND we were NOT aborting.
897     if (!isAborted() && this.serverManager != null &&
898         this.serverManager.isClusterShutdown()) {
899       this.serverManager.letRegionServersShutdown();
900     }
901     if (LOG.isDebugEnabled()) {
902       LOG.debug("Stopping service threads");
903     }
904     // Clean up and close up shop
905     if (this.logCleaner!= null) this.logCleaner.interrupt();
906     if (this.hfileCleaner != null) this.hfileCleaner.interrupt();
907     if (this.quotaManager != null) this.quotaManager.stop();
908     if (this.activeMasterManager != null) this.activeMasterManager.stop();
909     if (this.serverManager != null) this.serverManager.stop();
910     if (this.assignmentManager != null) this.assignmentManager.stop();
911     if (this.fileSystemManager != null) this.fileSystemManager.stop();
912     if (this.mpmHost != null) this.mpmHost.stop("server shutting down.");
913   }
914 
915   private void stopChores() {
916     if (this.balancerChore != null) {
917       this.balancerChore.interrupt();
918     }
919     if (this.clusterStatusChore != null) {
920       this.clusterStatusChore.interrupt();
921     }
922     if (this.catalogJanitorChore != null) {
923       this.catalogJanitorChore.interrupt();
924     }
925     if (this.clusterStatusPublisherChore != null){
926       clusterStatusPublisherChore.interrupt();
927     }
928   }
929 
930   /**
931    * @return Get remote side's InetAddress
932    * @throws UnknownHostException
933    */
934   InetAddress getRemoteInetAddress(final int port,
935       final long serverStartCode) throws UnknownHostException {
936     // Do it out here in its own little method so can fake an address when
937     // mocking up in tests.
938     InetAddress ia = RpcServer.getRemoteIp();
939 
940     // The call could be from the local regionserver,
941     // in which case, there is no remote address.
942     if (ia == null && serverStartCode == startcode) {
943       InetSocketAddress isa = rpcServices.getSocketAddress();
944       if (isa != null && isa.getPort() == port) {
945         ia = isa.getAddress();
946       }
947     }
948     return ia;
949   }
950 
951   /**
952    * @return Maximum time we should run balancer for
953    */
954   private int getBalancerCutoffTime() {
955     int balancerCutoffTime =
956       getConfiguration().getInt("hbase.balancer.max.balancing", -1);
957     if (balancerCutoffTime == -1) {
958       // No time period set so create one
959       int balancerPeriod =
960         getConfiguration().getInt("hbase.balancer.period", 300000);
961       balancerCutoffTime = balancerPeriod;
962       // If nonsense period, set it to balancerPeriod
963       if (balancerCutoffTime <= 0) balancerCutoffTime = balancerPeriod;
964     }
965     return balancerCutoffTime;
966   }
967 
968   public boolean balance() throws IOException {
969     // if master not initialized, don't run balancer.
970     if (!this.initialized) {
971       LOG.debug("Master has not been initialized, don't run balancer.");
972       return false;
973     }
974     // Do this call outside of synchronized block.
975     int maximumBalanceTime = getBalancerCutoffTime();
976     synchronized (this.balancer) {
977       // If balance not true, don't run balancer.
978       if (!this.loadBalancerTracker.isBalancerOn()) return false;
979       // Only allow one balance run at at time.
980       if (this.assignmentManager.getRegionStates().isRegionsInTransition()) {
981         Map<String, RegionState> regionsInTransition =
982           this.assignmentManager.getRegionStates().getRegionsInTransition();
983         LOG.debug("Not running balancer because " + regionsInTransition.size() +
984           " region(s) in transition: " + org.apache.commons.lang.StringUtils.
985             abbreviate(regionsInTransition.toString(), 256));
986         return false;
987       }
988       if (this.serverManager.areDeadServersInProgress()) {
989         LOG.debug("Not running balancer because processing dead regionserver(s): " +
990           this.serverManager.getDeadServers());
991         return false;
992       }
993 
994       if (this.cpHost != null) {
995         try {
996           if (this.cpHost.preBalance()) {
997             LOG.debug("Coprocessor bypassing balancer request");
998             return false;
999           }
1000         } catch (IOException ioe) {
1001           LOG.error("Error invoking master coprocessor preBalance()", ioe);
1002           return false;
1003         }
1004       }
1005 
1006       Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
1007         this.assignmentManager.getRegionStates().getAssignmentsByTable();
1008 
1009       List<RegionPlan> plans = new ArrayList<RegionPlan>();
1010       //Give the balancer the current cluster state.
1011       this.balancer.setClusterStatus(getClusterStatus());
1012       for (Map<ServerName, List<HRegionInfo>> assignments : assignmentsByTable.values()) {
1013         List<RegionPlan> partialPlans = this.balancer.balanceCluster(assignments);
1014         if (partialPlans != null) plans.addAll(partialPlans);
1015       }
1016       long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
1017       int rpCount = 0;  // number of RegionPlans balanced so far
1018       long totalRegPlanExecTime = 0;
1019       if (plans != null && !plans.isEmpty()) {
1020         for (RegionPlan plan: plans) {
1021           LOG.info("balance " + plan);
1022           long balStartTime = System.currentTimeMillis();
1023           //TODO: bulk assign
1024           this.assignmentManager.balance(plan);
1025           totalRegPlanExecTime += System.currentTimeMillis()-balStartTime;
1026           rpCount++;
1027           if (rpCount < plans.size() &&
1028               // if performing next balance exceeds cutoff time, exit the loop
1029               (System.currentTimeMillis() + (totalRegPlanExecTime / rpCount)) > cutoffTime) {
1030             //TODO: After balance, there should not be a cutoff time (keeping it as a security net for now)
1031             LOG.debug("No more balancing till next balance run; maximumBalanceTime=" +
1032               maximumBalanceTime);
1033             break;
1034           }
1035         }
1036       }
1037       if (this.cpHost != null) {
1038         try {
1039           this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1040         } catch (IOException ioe) {
1041           // balancing already succeeded so don't change the result
1042           LOG.error("Error invoking master coprocessor postBalance()", ioe);
1043         }
1044       }
1045     }
1046     // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1047     // Return true indicating a success.
1048     return true;
1049   }
1050 
1051   /**
1052    * @return Client info for use as prefix on an audit log string; who did an action
1053    */
1054   String getClientIdAuditPrefix() {
1055     return "Client=" + RequestContext.getRequestUserName() + "/" +
1056       RequestContext.get().getRemoteAddress();
1057   }
1058 
1059   /**
1060    * Switch for the background CatalogJanitor thread.
1061    * Used for testing.  The thread will continue to run.  It will just be a noop
1062    * if disabled.
1063    * @param b If false, the catalog janitor won't do anything.
1064    */
1065   public void setCatalogJanitorEnabled(final boolean b) {
1066     this.catalogJanitorChore.setEnabled(b);
1067   }
1068 
1069   @Override
1070   public void dispatchMergingRegions(final HRegionInfo region_a,
1071       final HRegionInfo region_b, final boolean forcible) throws IOException {
1072     checkInitialized();
1073     this.service.submit(new DispatchMergingRegionHandler(this,
1074         this.catalogJanitorChore, region_a, region_b, forcible));
1075   }
1076 
1077   void move(final byte[] encodedRegionName,
1078       final byte[] destServerName) throws HBaseIOException {
1079     RegionState regionState = assignmentManager.getRegionStates().
1080       getRegionState(Bytes.toString(encodedRegionName));
1081     if (regionState == null) {
1082       throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1083     }
1084 
1085     HRegionInfo hri = regionState.getRegion();
1086     ServerName dest;
1087     if (destServerName == null || destServerName.length == 0) {
1088       LOG.info("Passed destination servername is null/empty so " +
1089         "choosing a server at random");
1090       final List<ServerName> destServers = this.serverManager.createDestinationServersList(
1091         regionState.getServerName());
1092       dest = balancer.randomAssignment(hri, destServers);
1093       if (dest == null) {
1094         LOG.debug("Unable to determine a plan to assign " + hri);
1095         return;
1096       }
1097     } else {
1098       dest = ServerName.valueOf(Bytes.toString(destServerName));
1099       if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
1100           && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
1101         // To avoid unnecessary region moving later by balancer. Don't put user
1102         // regions on master. Regions on master could be put on other region
1103         // server intentionally by test however.
1104         LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1105           + " to avoid unnecessary region moving later by load balancer,"
1106           + " because it should not be on master");
1107         return;
1108       }
1109     }
1110 
1111     if (dest.equals(regionState.getServerName())) {
1112       LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1113         + " because region already assigned to the same server " + dest + ".");
1114       return;
1115     }
1116 
1117     // Now we can do the move
1118     RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1119 
1120     try {
1121       checkInitialized();
1122       if (this.cpHost != null) {
1123         if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
1124           return;
1125         }
1126       }
1127       LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1128       this.assignmentManager.balance(rp);
1129       if (this.cpHost != null) {
1130         this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1131       }
1132     } catch (IOException ioe) {
1133       if (ioe instanceof HBaseIOException) {
1134         throw (HBaseIOException)ioe;
1135       }
1136       throw new HBaseIOException(ioe);
1137     }
1138   }
1139 
1140   @Override
1141   public void createTable(HTableDescriptor hTableDescriptor,
1142       byte [][] splitKeys) throws IOException {
1143     if (isStopped()) {
1144       throw new MasterNotRunningException();
1145     }
1146 
1147     String namespace = hTableDescriptor.getTableName().getNamespaceAsString();
1148     getNamespaceDescriptor(namespace); // ensure namespace exists
1149 
1150     HRegionInfo[] newRegions = getHRegionInfos(hTableDescriptor, splitKeys);
1151     checkInitialized();
1152     sanityCheckTableDescriptor(hTableDescriptor);
1153     if (cpHost != null) {
1154       cpHost.preCreateTable(hTableDescriptor, newRegions);
1155     }
1156     LOG.info(getClientIdAuditPrefix() + " create " + hTableDescriptor);
1157     this.service.submit(new CreateTableHandler(this,
1158       this.fileSystemManager, hTableDescriptor, conf,
1159       newRegions, this).prepare());
1160     if (cpHost != null) {
1161       cpHost.postCreateTable(hTableDescriptor, newRegions);
1162     }
1163 
1164   }
1165 
1166   /**
1167    * Checks whether the table conforms to some sane limits, and configured
1168    * values (compression, etc) work. Throws an exception if something is wrong.
1169    * @throws IOException
1170    */
1171   private void sanityCheckTableDescriptor(final HTableDescriptor htd) throws IOException {
1172     final String CONF_KEY = "hbase.table.sanity.checks";
1173     if (!conf.getBoolean(CONF_KEY, true)) {
1174       return;
1175     }
1176     String tableVal = htd.getConfigurationValue(CONF_KEY);
1177     if (tableVal != null && !Boolean.valueOf(tableVal)) {
1178       return;
1179     }
1180 
1181     // check max file size
1182     long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
1183     long maxFileSize = htd.getMaxFileSize();
1184     if (maxFileSize < 0) {
1185       maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
1186     }
1187     if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
1188       throw new DoNotRetryIOException("MAX_FILESIZE for table descriptor or "
1189         + "\"hbase.hregion.max.filesize\" (" + maxFileSize
1190         + ") is too small, which might cause over splitting into unmanageable "
1191         + "number of regions. Set " + CONF_KEY + " to false at conf or table descriptor "
1192           + "if you want to bypass sanity checks");
1193     }
1194 
1195     // check flush size
1196     long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
1197     long flushSize = htd.getMemStoreFlushSize();
1198     if (flushSize < 0) {
1199       flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
1200     }
1201     if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
1202       throw new DoNotRetryIOException("MEMSTORE_FLUSHSIZE for table descriptor or "
1203           + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
1204           + " very frequent flushing. Set " + CONF_KEY + " to false at conf or table descriptor "
1205           + "if you want to bypass sanity checks");
1206     }
1207 
1208     // check split policy class can be loaded
1209     try {
1210       RegionSplitPolicy.getSplitPolicyClass(htd, conf);
1211     } catch (Exception ex) {
1212       throw new DoNotRetryIOException(ex);
1213     }
1214 
1215     // check compression can be loaded
1216     checkCompression(htd);
1217 
1218     // check that we have at least 1 CF
1219     if (htd.getColumnFamilies().length == 0) {
1220       throw new DoNotRetryIOException("Table should have at least one column family "
1221           + "Set "+CONF_KEY+" at conf or table descriptor if you want to bypass sanity checks");
1222     }
1223 
1224     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1225       if (hcd.getTimeToLive() <= 0) {
1226         throw new DoNotRetryIOException("TTL for column family " + hcd.getNameAsString()
1227           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1228           + "if you want to bypass sanity checks");
1229       }
1230 
1231       // check blockSize
1232       if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
1233         throw new DoNotRetryIOException("Block size for column family " + hcd.getNameAsString()
1234           + "  must be between 1K and 16MB Set "+CONF_KEY+" to false at conf or table descriptor "
1235           + "if you want to bypass sanity checks");
1236       }
1237 
1238       // check versions
1239       if (hcd.getMinVersions() < 0) {
1240         throw new DoNotRetryIOException("Min versions for column family " + hcd.getNameAsString()
1241           + "  must be positive. Set " + CONF_KEY + " to false at conf or table descriptor "
1242           + "if you want to bypass sanity checks");
1243       }
1244       // max versions already being checked
1245 
1246       // check replication scope
1247       if (hcd.getScope() < 0) {
1248         throw new DoNotRetryIOException("Replication scope for column family "
1249           + hcd.getNameAsString() + "  must be positive. Set " + CONF_KEY + " to false at conf "
1250           + "or table descriptor if you want to bypass sanity checks");
1251       }
1252 
1253       // TODO: should we check coprocessors and encryption ?
1254     }
1255   }
1256 
1257   private void startActiveMasterManager() throws KeeperException {
1258     String backupZNode = ZKUtil.joinZNode(
1259       zooKeeper.backupMasterAddressesZNode, serverName.toString());
1260     /*
1261     * Add a ZNode for ourselves in the backup master directory since we
1262     * may not become the active master. If so, we want the actual active
1263     * master to know we are backup masters, so that it won't assign
1264     * regions to us if so configured.
1265     *
1266     * If we become the active master later, ActiveMasterManager will delete
1267     * this node explicitly.  If we crash before then, ZooKeeper will delete
1268     * this node for us since it is ephemeral.
1269     */
1270     LOG.info("Adding ZNode for " + backupZNode + " in backup master directory");
1271     MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName);
1272 
1273     activeMasterManager = new ActiveMasterManager(zooKeeper, serverName, this);
1274     // Start a thread to try to become the active master, so we won't block here
1275     Threads.setDaemonThreadRunning(new Thread(new Runnable() {
1276       public void run() {
1277         int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT,
1278           HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
1279         // If we're a backup master, stall until a primary to writes his address
1280         if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP,
1281             HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
1282           LOG.debug("HMaster started in backup mode. "
1283             + "Stalling until master znode is written.");
1284           // This will only be a minute or so while the cluster starts up,
1285           // so don't worry about setting watches on the parent znode
1286           while (!activeMasterManager.hasActiveMaster()) {
1287             LOG.debug("Waiting for master address ZNode to be written "
1288               + "(Also watching cluster state node)");
1289             Threads.sleep(timeout);
1290           }
1291         }
1292         MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
1293         status.setDescription("Master startup");
1294         try {
1295           if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
1296             finishActiveMasterInitialization(status);
1297           }
1298         } catch (Throwable t) {
1299           status.setStatus("Failed to become active: " + t.getMessage());
1300           LOG.fatal("Failed to become active master", t);
1301           // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
1302           if (t instanceof NoClassDefFoundError &&
1303               t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
1304             // improved error message for this special case
1305             abort("HBase is having a problem with its Hadoop jars.  You may need to "
1306               + "recompile HBase against Hadoop version "
1307               +  org.apache.hadoop.util.VersionInfo.getVersion()
1308               + " or change your hadoop jars to start properly", t);
1309           } else {
1310             abort("Unhandled exception. Starting shutdown.", t);
1311           }
1312         } finally {
1313           status.cleanup();
1314         }
1315       }
1316     }, getServerName().toShortString() + ".activeMasterManager"));
1317   }
1318 
1319   private void checkCompression(final HTableDescriptor htd)
1320   throws IOException {
1321     if (!this.masterCheckCompression) return;
1322     for (HColumnDescriptor hcd : htd.getColumnFamilies()) {
1323       checkCompression(hcd);
1324     }
1325   }
1326 
1327   private void checkCompression(final HColumnDescriptor hcd)
1328   throws IOException {
1329     if (!this.masterCheckCompression) return;
1330     CompressionTest.testCompression(hcd.getCompression());
1331     CompressionTest.testCompression(hcd.getCompactionCompression());
1332   }
1333 
1334   private HRegionInfo[] getHRegionInfos(HTableDescriptor hTableDescriptor,
1335     byte[][] splitKeys) {
1336     long regionId = System.currentTimeMillis();
1337     HRegionInfo[] hRegionInfos = null;
1338     if (splitKeys == null || splitKeys.length == 0) {
1339       hRegionInfos = new HRegionInfo[]{new HRegionInfo(hTableDescriptor.getTableName(), null, null,
1340                 false, regionId)};
1341     } else {
1342       int numRegions = splitKeys.length + 1;
1343       hRegionInfos = new HRegionInfo[numRegions];
1344       byte[] startKey = null;
1345       byte[] endKey = null;
1346       for (int i = 0; i < numRegions; i++) {
1347         endKey = (i == splitKeys.length) ? null : splitKeys[i];
1348         hRegionInfos[i] =
1349              new HRegionInfo(hTableDescriptor.getTableName(), startKey, endKey,
1350                  false, regionId);
1351         startKey = endKey;
1352       }
1353     }
1354     return hRegionInfos;
1355   }
1356 
1357   private static boolean isCatalogTable(final TableName tableName) {
1358     return tableName.equals(TableName.META_TABLE_NAME);
1359   }
1360 
1361   @Override
1362   public void deleteTable(final TableName tableName) throws IOException {
1363     checkInitialized();
1364     if (cpHost != null) {
1365       cpHost.preDeleteTable(tableName);
1366     }
1367     LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
1368     this.service.submit(new DeleteTableHandler(tableName, this, this).prepare());
1369     if (cpHost != null) {
1370       cpHost.postDeleteTable(tableName);
1371     }
1372   }
1373 
1374   @Override
1375   public void truncateTable(TableName tableName, boolean preserveSplits) throws IOException {
1376     checkInitialized();
1377     if (cpHost != null) {
1378       cpHost.preTruncateTable(tableName);
1379     }
1380     LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
1381     TruncateTableHandler handler = new TruncateTableHandler(tableName, this, this, preserveSplits);
1382     handler.prepare();
1383     handler.process();
1384     if (cpHost != null) {
1385       cpHost.postTruncateTable(tableName);
1386     }
1387   }
1388 
1389   @Override
1390   public void addColumn(final TableName tableName, final HColumnDescriptor columnDescriptor)
1391       throws IOException {
1392     checkInitialized();
1393     checkCompression(columnDescriptor);
1394     if (cpHost != null) {
1395       if (cpHost.preAddColumn(tableName, columnDescriptor)) {
1396         return;
1397       }
1398     }
1399     //TODO: we should process this (and some others) in an executor
1400     new TableAddFamilyHandler(tableName, columnDescriptor, this, this).prepare().process();
1401     if (cpHost != null) {
1402       cpHost.postAddColumn(tableName, columnDescriptor);
1403     }
1404   }
1405 
1406   @Override
1407   public void modifyColumn(TableName tableName, HColumnDescriptor descriptor)
1408       throws IOException {
1409     checkInitialized();
1410     checkCompression(descriptor);
1411     if (cpHost != null) {
1412       if (cpHost.preModifyColumn(tableName, descriptor)) {
1413         return;
1414       }
1415     }
1416     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1417     new TableModifyFamilyHandler(tableName, descriptor, this, this)
1418       .prepare().process();
1419     if (cpHost != null) {
1420       cpHost.postModifyColumn(tableName, descriptor);
1421     }
1422   }
1423 
1424   @Override
1425   public void deleteColumn(final TableName tableName, final byte[] columnName)
1426       throws IOException {
1427     checkInitialized();
1428     if (cpHost != null) {
1429       if (cpHost.preDeleteColumn(tableName, columnName)) {
1430         return;
1431       }
1432     }
1433     LOG.info(getClientIdAuditPrefix() + " delete " + Bytes.toString(columnName));
1434     new TableDeleteFamilyHandler(tableName, columnName, this, this).prepare().process();
1435     if (cpHost != null) {
1436       cpHost.postDeleteColumn(tableName, columnName);
1437     }
1438   }
1439 
1440   @Override
1441   public void enableTable(final TableName tableName) throws IOException {
1442     checkInitialized();
1443     if (cpHost != null) {
1444       cpHost.preEnableTable(tableName);
1445     }
1446     LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
1447     this.service.submit(new EnableTableHandler(this, tableName,
1448       assignmentManager, tableLockManager, false).prepare());
1449     if (cpHost != null) {
1450       cpHost.postEnableTable(tableName);
1451    }
1452   }
1453 
1454   @Override
1455   public void disableTable(final TableName tableName) throws IOException {
1456     checkInitialized();
1457     if (cpHost != null) {
1458       cpHost.preDisableTable(tableName);
1459     }
1460     LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
1461     this.service.submit(new DisableTableHandler(this, tableName,
1462       assignmentManager, tableLockManager, false).prepare());
1463     if (cpHost != null) {
1464       cpHost.postDisableTable(tableName);
1465     }
1466   }
1467 
1468   /**
1469    * Return the region and current deployment for the region containing
1470    * the given row. If the region cannot be found, returns null. If it
1471    * is found, but not currently deployed, the second element of the pair
1472    * may be null.
1473    */
1474   Pair<HRegionInfo, ServerName> getTableRegionForRow(
1475       final TableName tableName, final byte [] rowKey)
1476   throws IOException {
1477     final AtomicReference<Pair<HRegionInfo, ServerName>> result =
1478       new AtomicReference<Pair<HRegionInfo, ServerName>>(null);
1479 
1480     MetaScannerVisitor visitor =
1481       new MetaScannerVisitorBase() {
1482         @Override
1483         public boolean processRow(Result data) throws IOException {
1484           if (data == null || data.size() <= 0) {
1485             return true;
1486           }
1487           Pair<HRegionInfo, ServerName> pair = HRegionInfo.getHRegionInfoAndServerName(data);
1488           if (pair == null) {
1489             return false;
1490           }
1491           if (!pair.getFirst().getTable().equals(tableName)) {
1492             return false;
1493           }
1494           result.set(pair);
1495           return true;
1496         }
1497     };
1498 
1499     MetaScanner.metaScan(conf, visitor, tableName, rowKey, 1);
1500     return result.get();
1501   }
1502 
1503   @Override
1504   public void modifyTable(final TableName tableName, final HTableDescriptor descriptor)
1505       throws IOException {
1506     checkInitialized();
1507     sanityCheckTableDescriptor(descriptor);
1508     if (cpHost != null) {
1509       cpHost.preModifyTable(tableName, descriptor);
1510     }
1511     LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
1512     new ModifyTableHandler(tableName, descriptor, this, this).prepare().process();
1513     if (cpHost != null) {
1514       cpHost.postModifyTable(tableName, descriptor);
1515     }
1516   }
1517 
1518   @Override
1519   public void checkTableModifiable(final TableName tableName)
1520       throws IOException, TableNotFoundException, TableNotDisabledException {
1521     if (isCatalogTable(tableName)) {
1522       throw new IOException("Can't modify catalog tables");
1523     }
1524     if (!MetaTableAccessor.tableExists(getShortCircuitConnection(), tableName)) {
1525       throw new TableNotFoundException(tableName);
1526     }
1527     if (!getAssignmentManager().getTableStateManager().
1528         isTableState(tableName, TableState.State.DISABLED)) {
1529       throw new TableNotDisabledException(tableName);
1530     }
1531   }
1532 
1533   /**
1534    * @return cluster status
1535    */
1536   public ClusterStatus getClusterStatus() throws InterruptedIOException {
1537     // Build Set of backup masters from ZK nodes
1538     List<String> backupMasterStrings;
1539     try {
1540       backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
1541         this.zooKeeper.backupMasterAddressesZNode);
1542     } catch (KeeperException e) {
1543       LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
1544       backupMasterStrings = new ArrayList<String>(0);
1545     }
1546     List<ServerName> backupMasters = new ArrayList<ServerName>(
1547                                           backupMasterStrings.size());
1548     for (String s: backupMasterStrings) {
1549       try {
1550         byte [] bytes;
1551         try {
1552           bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode(
1553               this.zooKeeper.backupMasterAddressesZNode, s));
1554         } catch (InterruptedException e) {
1555           throw new InterruptedIOException();
1556         }
1557         if (bytes != null) {
1558           ServerName sn;
1559           try {
1560             sn = ServerName.parseFrom(bytes);
1561           } catch (DeserializationException e) {
1562             LOG.warn("Failed parse, skipping registering backup server", e);
1563             continue;
1564           }
1565           backupMasters.add(sn);
1566         }
1567       } catch (KeeperException e) {
1568         LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
1569                  "backup servers"), e);
1570       }
1571     }
1572     Collections.sort(backupMasters, new Comparator<ServerName>() {
1573       @Override
1574       public int compare(ServerName s1, ServerName s2) {
1575         return s1.getServerName().compareTo(s2.getServerName());
1576       }});
1577 
1578     String clusterId = fileSystemManager != null ?
1579       fileSystemManager.getClusterId().toString() : null;
1580     Map<String, RegionState> regionsInTransition = assignmentManager != null ?
1581       assignmentManager.getRegionStates().getRegionsInTransition() : null;
1582     String[] coprocessors = cpHost != null ? getMasterCoprocessors() : null;
1583     boolean balancerOn = loadBalancerTracker != null ?
1584       loadBalancerTracker.isBalancerOn() : false;
1585     Map<ServerName, ServerLoad> onlineServers = null;
1586     Set<ServerName> deadServers = null;
1587     if (serverManager != null) {
1588       deadServers = serverManager.getDeadServers().copyServerNames();
1589       onlineServers = serverManager.getOnlineServers();
1590     }
1591     return new ClusterStatus(VersionInfo.getVersion(), clusterId,
1592       onlineServers, deadServers, serverName, backupMasters,
1593       regionsInTransition, coprocessors, balancerOn);
1594   }
1595 
1596   /**
1597    * The set of loaded coprocessors is stored in a static set. Since it's
1598    * statically allocated, it does not require that HMaster's cpHost be
1599    * initialized prior to accessing it.
1600    * @return a String representation of the set of names of the loaded
1601    * coprocessors.
1602    */
1603   public static String getLoadedCoprocessors() {
1604     return CoprocessorHost.getLoadedCoprocessors().toString();
1605   }
1606 
1607   /**
1608    * @return timestamp in millis when HMaster was started.
1609    */
1610   public long getMasterStartTime() {
1611     return startcode;
1612   }
1613 
1614   /**
1615    * @return timestamp in millis when HMaster became the active master.
1616    */
1617   public long getMasterActiveTime() {
1618     return masterActiveTime;
1619   }
1620 
1621   public int getRegionServerInfoPort(final ServerName sn) {
1622     RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn);
1623     if (info == null || info.getInfoPort() == 0) {
1624       return conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
1625         HConstants.DEFAULT_REGIONSERVER_INFOPORT);
1626     }
1627     return info.getInfoPort();
1628   }
1629 
1630   /**
1631    * @return array of coprocessor SimpleNames.
1632    */
1633   public String[] getMasterCoprocessors() {
1634     Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
1635     return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
1636   }
1637 
1638   @Override
1639   public void abort(final String msg, final Throwable t) {
1640     if (isAborted() || isStopped()) {
1641       return;
1642     }
1643     if (cpHost != null) {
1644       // HBASE-4014: dump a list of loaded coprocessors.
1645       LOG.fatal("Master server abort: loaded coprocessors are: " +
1646           getLoadedCoprocessors());
1647     }
1648     if (t != null) LOG.fatal(msg, t);
1649     stop(msg);
1650   }
1651 
1652   @Override
1653   public ZooKeeperWatcher getZooKeeper() {
1654     return zooKeeper;
1655   }
1656 
1657   @Override
1658   public MasterCoprocessorHost getMasterCoprocessorHost() {
1659     return cpHost;
1660   }
1661 
1662   @Override
1663   public MasterQuotaManager getMasterQuotaManager() {
1664     return quotaManager;
1665   }
1666 
1667   @Override
1668   public ServerName getServerName() {
1669     return this.serverName;
1670   }
1671 
1672   @Override
1673   public AssignmentManager getAssignmentManager() {
1674     return this.assignmentManager;
1675   }
1676 
1677   public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
1678     return rsFatals;
1679   }
1680 
1681   public void shutdown() {
1682     if (cpHost != null) {
1683       try {
1684         cpHost.preShutdown();
1685       } catch (IOException ioe) {
1686         LOG.error("Error call master coprocessor preShutdown()", ioe);
1687       }
1688     }
1689 
1690     if (this.serverManager != null) {
1691       this.serverManager.shutdownCluster();
1692     }
1693     if (this.clusterStatusTracker != null){
1694       try {
1695         this.clusterStatusTracker.setClusterDown();
1696       } catch (KeeperException e) {
1697         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
1698       }
1699     }
1700   }
1701 
1702   public void stopMaster() {
1703     if (cpHost != null) {
1704       try {
1705         cpHost.preStopMaster();
1706       } catch (IOException ioe) {
1707         LOG.error("Error call master coprocessor preStopMaster()", ioe);
1708       }
1709     }
1710     stop("Stopped by " + Thread.currentThread().getName());
1711   }
1712 
1713   void checkServiceStarted() throws ServerNotRunningYetException {
1714     if (!serviceStarted) {
1715       throw new ServerNotRunningYetException("Server is not running yet");
1716     }
1717   }
1718 
1719   void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException {
1720     checkServiceStarted();
1721     if (!this.initialized) {
1722       throw new PleaseHoldException("Master is initializing");
1723     }
1724   }
1725 
1726   void checkNamespaceManagerReady() throws IOException {
1727     checkInitialized();
1728     if (tableNamespaceManager == null ||
1729         !tableNamespaceManager.isTableAvailableAndInitialized()) {
1730       throw new IOException("Table Namespace Manager not ready yet, try again later");
1731     }
1732   }
1733   /**
1734    * Report whether this master is currently the active master or not.
1735    * If not active master, we are parked on ZK waiting to become active.
1736    *
1737    * This method is used for testing.
1738    *
1739    * @return true if active master, false if not.
1740    */
1741   public boolean isActiveMaster() {
1742     return isActiveMaster;
1743   }
1744 
1745   /**
1746    * Report whether this master has completed with its initialization and is
1747    * ready.  If ready, the master is also the active master.  A standby master
1748    * is never ready.
1749    *
1750    * This method is used for testing.
1751    *
1752    * @return true if master is ready to go, false if not.
1753    */
1754   @Override
1755   public boolean isInitialized() {
1756     return initialized;
1757   }
1758 
1759   /**
1760    * ServerShutdownHandlerEnabled is set false before completing
1761    * assignMeta to prevent processing of ServerShutdownHandler.
1762    * @return true if assignMeta has completed;
1763    */
1764   @Override
1765   public boolean isServerShutdownHandlerEnabled() {
1766     return this.serverShutdownHandlerEnabled;
1767   }
1768 
1769   /**
1770    * Report whether this master has started initialization and is about to do meta region assignment
1771    * @return true if master is in initialization & about to assign hbase:meta regions
1772    */
1773   public boolean isInitializationStartsMetaRegionAssignment() {
1774     return this.initializationBeforeMetaAssignment;
1775   }
1776 
1777   public void assignRegion(HRegionInfo hri) {
1778     assignmentManager.assign(hri);
1779   }
1780 
1781   /**
1782    * Compute the average load across all region servers.
1783    * Currently, this uses a very naive computation - just uses the number of
1784    * regions being served, ignoring stats about number of requests.
1785    * @return the average load
1786    */
1787   public double getAverageLoad() {
1788     if (this.assignmentManager == null) {
1789       return 0;
1790     }
1791 
1792     RegionStates regionStates = this.assignmentManager.getRegionStates();
1793     if (regionStates == null) {
1794       return 0;
1795     }
1796     return regionStates.getAverageLoad();
1797   }
1798 
1799   @Override
1800   public boolean registerService(Service instance) {
1801     /*
1802      * No stacking of instances is allowed for a single service name
1803      */
1804     Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
1805     if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
1806       LOG.error("Coprocessor service "+serviceDesc.getFullName()+
1807           " already registered, rejecting request from "+instance
1808       );
1809       return false;
1810     }
1811 
1812     coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
1813     if (LOG.isDebugEnabled()) {
1814       LOG.debug("Registered master coprocessor service: service="+serviceDesc.getFullName());
1815     }
1816     return true;
1817   }
1818 
1819   /**
1820    * Utility for constructing an instance of the passed HMaster class.
1821    * @param masterClass
1822    * @param conf
1823    * @return HMaster instance.
1824    */
1825   public static HMaster constructMaster(Class<? extends HMaster> masterClass,
1826       final Configuration conf, final CoordinatedStateManager cp)  {
1827     try {
1828       Constructor<? extends HMaster> c =
1829         masterClass.getConstructor(Configuration.class, CoordinatedStateManager.class);
1830       return c.newInstance(conf, cp);
1831     } catch (InvocationTargetException ite) {
1832       Throwable target = ite.getTargetException() != null?
1833         ite.getTargetException(): ite;
1834       if (target.getCause() != null) target = target.getCause();
1835       throw new RuntimeException("Failed construction of Master: " +
1836         masterClass.toString(), target);
1837     } catch (Exception e) {
1838       throw new RuntimeException("Failed construction of Master: " +
1839         masterClass.toString() + ((e.getCause() != null)?
1840           e.getCause().getMessage(): ""), e);
1841     }
1842   }
1843 
1844   /**
1845    * @see org.apache.hadoop.hbase.master.HMasterCommandLine
1846    */
1847   public static void main(String [] args) {
1848     VersionInfo.logVersion();
1849     new HMasterCommandLine(HMaster.class).doMain(args);
1850   }
1851 
1852   public HFileCleaner getHFileCleaner() {
1853     return this.hfileCleaner;
1854   }
1855 
1856   /**
1857    * Exposed for TESTING!
1858    * @return the underlying snapshot manager
1859    */
1860   public SnapshotManager getSnapshotManagerForTesting() {
1861     return this.snapshotManager;
1862   }
1863 
1864   @Override
1865   public void createNamespace(NamespaceDescriptor descriptor) throws IOException {
1866     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1867     checkNamespaceManagerReady();
1868     if (cpHost != null) {
1869       if (cpHost.preCreateNamespace(descriptor)) {
1870         return;
1871       }
1872     }
1873     LOG.info(getClientIdAuditPrefix() + " creating " + descriptor);
1874     tableNamespaceManager.create(descriptor);
1875     if (cpHost != null) {
1876       cpHost.postCreateNamespace(descriptor);
1877     }
1878   }
1879 
1880   @Override
1881   public void modifyNamespace(NamespaceDescriptor descriptor) throws IOException {
1882     TableName.isLegalNamespaceName(Bytes.toBytes(descriptor.getName()));
1883     checkNamespaceManagerReady();
1884     if (cpHost != null) {
1885       if (cpHost.preModifyNamespace(descriptor)) {
1886         return;
1887       }
1888     }
1889     LOG.info(getClientIdAuditPrefix() + " modify " + descriptor);
1890     tableNamespaceManager.update(descriptor);
1891     if (cpHost != null) {
1892       cpHost.postModifyNamespace(descriptor);
1893     }
1894   }
1895 
1896   @Override
1897   public void deleteNamespace(String name) throws IOException {
1898     checkNamespaceManagerReady();
1899     if (cpHost != null) {
1900       if (cpHost.preDeleteNamespace(name)) {
1901         return;
1902       }
1903     }
1904     LOG.info(getClientIdAuditPrefix() + " delete " + name);
1905     tableNamespaceManager.remove(name);
1906     if (cpHost != null) {
1907       cpHost.postDeleteNamespace(name);
1908     }
1909   }
1910 
1911   @Override
1912   public NamespaceDescriptor getNamespaceDescriptor(String name) throws IOException {
1913     checkNamespaceManagerReady();
1914     NamespaceDescriptor nsd = tableNamespaceManager.get(name);
1915     if (nsd == null) {
1916       throw new NamespaceNotFoundException(name);
1917     }
1918     return nsd;
1919   }
1920 
1921   @Override
1922   public List<NamespaceDescriptor> listNamespaceDescriptors() throws IOException {
1923     checkNamespaceManagerReady();
1924     return Lists.newArrayList(tableNamespaceManager.list());
1925   }
1926 
1927   @Override
1928   public List<HTableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
1929     getNamespaceDescriptor(name); // check that namespace exists
1930     return Lists.newArrayList(tableDescriptors.getByNamespace(name).values());
1931   }
1932 
1933   @Override
1934   public List<TableName> listTableNamesByNamespace(String name) throws IOException {
1935     List<TableName> tableNames = Lists.newArrayList();
1936     getNamespaceDescriptor(name); // check that namespace exists
1937     for (HTableDescriptor descriptor: tableDescriptors.getByNamespace(name).values()) {
1938       tableNames.add(descriptor.getTableName());
1939     }
1940     return tableNames;
1941   }
1942 }