001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
021
022import com.google.protobuf.Descriptors;
023import com.google.protobuf.Service;
024import java.io.IOException;
025import java.io.InterruptedIOException;
026import java.lang.reflect.Constructor;
027import java.lang.reflect.InvocationTargetException;
028import java.net.InetAddress;
029import java.net.InetSocketAddress;
030import java.net.UnknownHostException;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collection;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.EnumSet;
037import java.util.HashMap;
038import java.util.Iterator;
039import java.util.List;
040import java.util.Map;
041import java.util.Map.Entry;
042import java.util.Objects;
043import java.util.Optional;
044import java.util.Set;
045import java.util.concurrent.ExecutionException;
046import java.util.concurrent.Future;
047import java.util.concurrent.TimeUnit;
048import java.util.concurrent.TimeoutException;
049import java.util.concurrent.atomic.AtomicInteger;
050import java.util.function.Function;
051import java.util.regex.Pattern;
052import java.util.stream.Collectors;
053import javax.servlet.ServletException;
054import javax.servlet.http.HttpServlet;
055import javax.servlet.http.HttpServletRequest;
056import javax.servlet.http.HttpServletResponse;
057
058import org.apache.commons.lang3.StringUtils;
059import org.apache.hadoop.conf.Configuration;
060import org.apache.hadoop.fs.Path;
061import org.apache.hadoop.hbase.ChoreService;
062import org.apache.hadoop.hbase.ClusterId;
063import org.apache.hadoop.hbase.ClusterMetrics;
064import org.apache.hadoop.hbase.ClusterMetrics.Option;
065import org.apache.hadoop.hbase.ClusterMetricsBuilder;
066import org.apache.hadoop.hbase.DoNotRetryIOException;
067import org.apache.hadoop.hbase.HBaseIOException;
068import org.apache.hadoop.hbase.HBaseInterfaceAudience;
069import org.apache.hadoop.hbase.HConstants;
070import org.apache.hadoop.hbase.InvalidFamilyOperationException;
071import org.apache.hadoop.hbase.MasterNotRunningException;
072import org.apache.hadoop.hbase.MetaTableAccessor;
073import org.apache.hadoop.hbase.NamespaceDescriptor;
074import org.apache.hadoop.hbase.PleaseHoldException;
075import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
076import org.apache.hadoop.hbase.ServerMetrics;
077import org.apache.hadoop.hbase.ServerName;
078import org.apache.hadoop.hbase.TableDescriptors;
079import org.apache.hadoop.hbase.TableName;
080import org.apache.hadoop.hbase.TableNotDisabledException;
081import org.apache.hadoop.hbase.TableNotFoundException;
082import org.apache.hadoop.hbase.UnknownRegionException;
083import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
084import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
085import org.apache.hadoop.hbase.client.MasterSwitchType;
086import org.apache.hadoop.hbase.client.RegionInfo;
087import org.apache.hadoop.hbase.client.RegionInfoBuilder;
088import org.apache.hadoop.hbase.client.RegionStatesCount;
089import org.apache.hadoop.hbase.client.TableDescriptor;
090import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
091import org.apache.hadoop.hbase.client.TableState;
092import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
093import org.apache.hadoop.hbase.exceptions.DeserializationException;
094import org.apache.hadoop.hbase.executor.ExecutorType;
095import org.apache.hadoop.hbase.favored.FavoredNodesManager;
096import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
097import org.apache.hadoop.hbase.http.InfoServer;
098import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
099import org.apache.hadoop.hbase.ipc.RpcServer;
100import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
101import org.apache.hadoop.hbase.log.HBaseMarkers;
102import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
103import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
104import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
105import org.apache.hadoop.hbase.master.assignment.RegionStates;
106import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
107import org.apache.hadoop.hbase.master.balancer.BalancerChore;
108import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
109import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
110import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
111import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
112import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
113import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
114import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner;
115import org.apache.hadoop.hbase.master.locking.LockManager;
116import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
117import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType;
118import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer;
119import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerChore;
120import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerFactory;
121import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
122import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
123import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
124import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
125import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
126import org.apache.hadoop.hbase.master.procedure.InitMetaProcedure;
127import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
128import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
129import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
130import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
131import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
132import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
133import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
134import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
135import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
136import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
137import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
138import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
139import org.apache.hadoop.hbase.master.replication.ModifyPeerProcedure;
140import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
141import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
142import org.apache.hadoop.hbase.master.replication.UpdatePeerConfigProcedure;
143import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
144import org.apache.hadoop.hbase.master.zksyncer.MasterAddressSyncer;
145import org.apache.hadoop.hbase.master.zksyncer.MetaLocationSyncer;
146import org.apache.hadoop.hbase.mob.MobConstants;
147import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
148import org.apache.hadoop.hbase.monitoring.MonitoredTask;
149import org.apache.hadoop.hbase.monitoring.TaskMonitor;
150import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
151import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
152import org.apache.hadoop.hbase.procedure2.LockedResource;
153import org.apache.hadoop.hbase.procedure2.Procedure;
154import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
155import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
156import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
157import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
158import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
159import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
160import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
161import org.apache.hadoop.hbase.quotas.MasterQuotasObserver;
162import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
163import org.apache.hadoop.hbase.quotas.QuotaTableUtil;
164import org.apache.hadoop.hbase.quotas.QuotaUtil;
165import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore;
166import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
167import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
168import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
169import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
170import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
171import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine;
172import org.apache.hadoop.hbase.regionserver.HRegionServer;
173import org.apache.hadoop.hbase.regionserver.HStore;
174import org.apache.hadoop.hbase.regionserver.RSRpcServices;
175import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
176import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
177import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy;
178import org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy;
179import org.apache.hadoop.hbase.replication.ReplicationException;
180import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
181import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
182import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
183import org.apache.hadoop.hbase.replication.ReplicationUtils;
184import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner;
185import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
186import org.apache.hadoop.hbase.replication.master.ReplicationPeerConfigUpgrader;
187import org.apache.hadoop.hbase.replication.regionserver.ReplicationStatus;
188import org.apache.hadoop.hbase.security.AccessDeniedException;
189import org.apache.hadoop.hbase.security.UserProvider;
190import org.apache.hadoop.hbase.trace.TraceUtil;
191import org.apache.hadoop.hbase.util.Addressing;
192import org.apache.hadoop.hbase.util.Bytes;
193import org.apache.hadoop.hbase.util.CompressionTest;
194import org.apache.hadoop.hbase.util.EncryptionTest;
195import org.apache.hadoop.hbase.util.HBaseFsck;
196import org.apache.hadoop.hbase.util.HFileArchiveUtil;
197import org.apache.hadoop.hbase.util.HasThread;
198import org.apache.hadoop.hbase.util.IdLock;
199import org.apache.hadoop.hbase.util.ModifyRegionUtils;
200import org.apache.hadoop.hbase.util.Pair;
201import org.apache.hadoop.hbase.util.RetryCounter;
202import org.apache.hadoop.hbase.util.RetryCounterFactory;
203import org.apache.hadoop.hbase.util.Threads;
204import org.apache.hadoop.hbase.util.VersionInfo;
205import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
206import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
207import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker;
208import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
209import org.apache.hadoop.hbase.zookeeper.ZKUtil;
210import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
211import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
212import org.apache.yetus.audience.InterfaceAudience;
213import org.apache.zookeeper.KeeperException;
214import org.eclipse.jetty.server.Server;
215import org.eclipse.jetty.server.ServerConnector;
216import org.eclipse.jetty.servlet.ServletHolder;
217import org.eclipse.jetty.webapp.WebAppContext;
218import org.slf4j.Logger;
219import org.slf4j.LoggerFactory;
220
221import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
222import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
223import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
224
225import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
226import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
227import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
228import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
229
230/**
231 * HMaster is the "master server" for HBase. An HBase cluster has one active
232 * master.  If many masters are started, all compete.  Whichever wins goes on to
233 * run the cluster.  All others park themselves in their constructor until
234 * master or cluster shutdown or until the active master loses its lease in
235 * zookeeper.  Thereafter, all running master jostle to take over master role.
236 *
237 * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}.  In
238 * this case it will tell all regionservers to go down and then wait on them
239 * all reporting in that they are down.  This master will then shut itself down.
240 *
241 * <p>You can also shutdown just this master.  Call {@link #stopMaster()}.
242 *
243 * @see org.apache.zookeeper.Watcher
244 */
245@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
246@SuppressWarnings("deprecation")
247public class HMaster extends HRegionServer implements MasterServices {
248  private static Logger LOG = LoggerFactory.getLogger(HMaster.class);
249
250  /**
251   * Protection against zombie master. Started once Master accepts active responsibility and
252   * starts taking over responsibilities. Allows a finite time window before giving up ownership.
253   */
254  private static class InitializationMonitor extends HasThread {
255    /** The amount of time in milliseconds to sleep before checking initialization status. */
256    public static final String TIMEOUT_KEY = "hbase.master.initializationmonitor.timeout";
257    public static final long TIMEOUT_DEFAULT = TimeUnit.MILLISECONDS.convert(15, TimeUnit.MINUTES);
258
259    /**
260     * When timeout expired and initialization has not complete, call {@link System#exit(int)} when
261     * true, do nothing otherwise.
262     */
263    public static final String HALT_KEY = "hbase.master.initializationmonitor.haltontimeout";
264    public static final boolean HALT_DEFAULT = false;
265
266    private final HMaster master;
267    private final long timeout;
268    private final boolean haltOnTimeout;
269
270    /** Creates a Thread that monitors the {@link #isInitialized()} state. */
271    InitializationMonitor(HMaster master) {
272      super("MasterInitializationMonitor");
273      this.master = master;
274      this.timeout = master.getConfiguration().getLong(TIMEOUT_KEY, TIMEOUT_DEFAULT);
275      this.haltOnTimeout = master.getConfiguration().getBoolean(HALT_KEY, HALT_DEFAULT);
276      this.setDaemon(true);
277    }
278
279    @Override
280    public void run() {
281      try {
282        while (!master.isStopped() && master.isActiveMaster()) {
283          Thread.sleep(timeout);
284          if (master.isInitialized()) {
285            LOG.debug("Initialization completed within allotted tolerance. Monitor exiting.");
286          } else {
287            LOG.error("Master failed to complete initialization after " + timeout + "ms. Please"
288                + " consider submitting a bug report including a thread dump of this process.");
289            if (haltOnTimeout) {
290              LOG.error("Zombie Master exiting. Thread dump to stdout");
291              Threads.printThreadInfo(System.out, "Zombie HMaster");
292              System.exit(-1);
293            }
294          }
295        }
296      } catch (InterruptedException ie) {
297        LOG.trace("InitMonitor thread interrupted. Existing.");
298      }
299    }
300  }
301
302  // MASTER is name of the webapp and the attribute name used stuffing this
303  //instance into web context.
304  public static final String MASTER = "master";
305
306  // Manager and zk listener for master election
307  private final ActiveMasterManager activeMasterManager;
308  // Region server tracker
309  private RegionServerTracker regionServerTracker;
310  // Draining region server tracker
311  private DrainingServerTracker drainingServerTracker;
312  // Tracker for load balancer state
313  LoadBalancerTracker loadBalancerTracker;
314  // Tracker for meta location, if any client ZK quorum specified
315  MetaLocationSyncer metaLocationSyncer;
316  // Tracker for active master location, if any client ZK quorum specified
317  MasterAddressSyncer masterAddressSyncer;
318
319  // Tracker for split and merge state
320  private SplitOrMergeTracker splitOrMergeTracker;
321
322  // Tracker for region normalizer state
323  private RegionNormalizerTracker regionNormalizerTracker;
324
325  private ClusterSchemaService clusterSchemaService;
326
327  public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS =
328    "hbase.master.wait.on.service.seconds";
329  public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60;
330
331  // Metrics for the HMaster
332  final MetricsMaster metricsMaster;
333  // file system manager for the master FS operations
334  private MasterFileSystem fileSystemManager;
335  private MasterWalManager walManager;
336
337  // server manager to deal with region server info
338  private volatile ServerManager serverManager;
339
340  // manager of assignment nodes in zookeeper
341  private AssignmentManager assignmentManager;
342
343  // manager of replication
344  private ReplicationPeerManager replicationPeerManager;
345
346  // buffer for "fatal error" notices from region servers
347  // in the cluster. This is only used for assisting
348  // operations/debugging.
349  MemoryBoundedLogMessageBuffer rsFatals;
350
351  // flag set after we become the active master (used for testing)
352  private volatile boolean activeMaster = false;
353
354  // flag set after we complete initialization once active
355  private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
356
357  // flag set after master services are started,
358  // initialization may have not completed yet.
359  volatile boolean serviceStarted = false;
360
361  // Maximum time we should run balancer for
362  private final int maxBlancingTime;
363  // Maximum percent of regions in transition when balancing
364  private final double maxRitPercent;
365
366  private final LockManager lockManager = new LockManager(this);
367
368  private LoadBalancer balancer;
369  private RegionNormalizer normalizer;
370  private BalancerChore balancerChore;
371  private RegionNormalizerChore normalizerChore;
372  private ClusterStatusChore clusterStatusChore;
373  private ClusterStatusPublisher clusterStatusPublisherChore = null;
374
375  private HbckChore hbckChore;
376  CatalogJanitor catalogJanitorChore;
377  private DirScanPool cleanerPool;
378  private LogCleaner logCleaner;
379  private HFileCleaner hfileCleaner;
380  private ReplicationBarrierCleaner replicationBarrierCleaner;
381  private ExpiredMobFileCleanerChore expiredMobFileCleanerChore;
382  private MobCompactionChore mobCompactChore;
383  private MasterMobCompactionThread mobCompactThread;
384  // used to synchronize the mobCompactionStates
385  private final IdLock mobCompactionLock = new IdLock();
386  // save the information of mob compactions in tables.
387  // the key is table name, the value is the number of compactions in that table.
388  private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap();
389
390  MasterCoprocessorHost cpHost;
391
392  private final boolean preLoadTableDescriptors;
393
394  // Time stamps for when a hmaster became active
395  private long masterActiveTime;
396
397  // Time stamp for when HMaster finishes becoming Active Master
398  private long masterFinishedInitializationTime;
399
400  //should we check the compression codec type at master side, default true, HBASE-6370
401  private final boolean masterCheckCompression;
402
403  //should we check encryption settings at master side, default true
404  private final boolean masterCheckEncryption;
405
406  Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
407
408  // monitor for snapshot of hbase tables
409  SnapshotManager snapshotManager;
410  // monitor for distributed procedures
411  private MasterProcedureManagerHost mpmHost;
412
413  // it is assigned after 'initialized' guard set to true, so should be volatile
414  private volatile MasterQuotaManager quotaManager;
415  private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
416  private QuotaObserverChore quotaObserverChore;
417  private SnapshotQuotaObserverChore snapshotQuotaChore;
418
419  private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
420  private WALProcedureStore procedureStore;
421
422  // handle table states
423  private TableStateManager tableStateManager;
424
425  private long splitPlanCount;
426  private long mergePlanCount;
427
428  /* Handle favored nodes information */
429  private FavoredNodesManager favoredNodesManager;
430
431  /** jetty server for master to redirect requests to regionserver infoServer */
432  private Server masterJettyServer;
433
434  // Determine if we should do normal startup or minimal "single-user" mode with no region
435  // servers and no user tables. Useful for repair and recovery of hbase:meta
436  private final boolean maintenanceMode;
437  static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
438
439  public static class RedirectServlet extends HttpServlet {
440    private static final long serialVersionUID = 2894774810058302473L;
441    private final int regionServerInfoPort;
442    private final String regionServerHostname;
443
444    /**
445     * @param infoServer that we're trying to send all requests to
446     * @param hostname may be null. if given, will be used for redirects instead of host from client.
447     */
448    public RedirectServlet(InfoServer infoServer, String hostname) {
449       regionServerInfoPort = infoServer.getPort();
450       regionServerHostname = hostname;
451    }
452
453    @Override
454    public void doGet(HttpServletRequest request,
455        HttpServletResponse response) throws ServletException, IOException {
456      String redirectHost = regionServerHostname;
457      if(redirectHost == null) {
458        redirectHost = request.getServerName();
459        if(!Addressing.isLocalAddress(InetAddress.getByName(redirectHost))) {
460          LOG.warn("Couldn't resolve '" + redirectHost + "' as an address local to this node and '" +
461              MASTER_HOSTNAME_KEY + "' is not set; client will get an HTTP 400 response. If " +
462              "your HBase deployment relies on client accessible names that the region server process " +
463              "can't resolve locally, then you should set the previously mentioned configuration variable " +
464              "to an appropriate hostname.");
465          // no sending client provided input back to the client, so the goal host is just in the logs.
466          response.sendError(400, "Request was to a host that I can't resolve for any of the network interfaces on " +
467              "this node. If this is due to an intermediary such as an HTTP load balancer or other proxy, your HBase " +
468              "administrator can set '" + MASTER_HOSTNAME_KEY + "' to point to the correct hostname.");
469          return;
470        }
471      }
472      // TODO this scheme should come from looking at the scheme registered in the infoserver's http server for the
473      // host and port we're using, but it's buried way too deep to do that ATM.
474      String redirectUrl = request.getScheme() + "://"
475        + redirectHost + ":" + regionServerInfoPort
476        + request.getRequestURI();
477      response.sendRedirect(redirectUrl);
478    }
479  }
480
481  /**
482   * Initializes the HMaster. The steps are as follows:
483   * <p>
484   * <ol>
485   * <li>Initialize the local HRegionServer
486   * <li>Start the ActiveMasterManager.
487   * </ol>
488   * <p>
489   * Remaining steps of initialization occur in
490   * #finishActiveMasterInitialization(MonitoredTask) after
491   * the master becomes the active one.
492   */
493  public HMaster(final Configuration conf)
494      throws IOException, KeeperException {
495    super(conf);
496    TraceUtil.initTracer(conf);
497    try {
498      if (conf.getBoolean(MAINTENANCE_MODE, false)) {
499        LOG.info("Detected {}=true via configuration.", MAINTENANCE_MODE);
500        maintenanceMode = true;
501      } else if (Boolean.getBoolean(MAINTENANCE_MODE)) {
502        LOG.info("Detected {}=true via environment variables.", MAINTENANCE_MODE);
503        maintenanceMode = true;
504      } else {
505        maintenanceMode = false;
506      }
507
508      this.rsFatals = new MemoryBoundedLogMessageBuffer(
509          conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
510      LOG.info("hbase.rootdir=" + getRootDir() +
511          ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
512
513      // Disable usage of meta replicas in the master
514      this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
515
516      decorateMasterConfiguration(this.conf);
517
518      // Hack! Maps DFSClient => Master for logs.  HDFS made this
519      // config param for task trackers, but we can piggyback off of it.
520      if (this.conf.get("mapreduce.task.attempt.id") == null) {
521        this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
522      }
523
524      // should we check the compression codec type at master side, default true, HBASE-6370
525      this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
526
527      // should we check encryption settings at master side, default true
528      this.masterCheckEncryption = conf.getBoolean("hbase.master.check.encryption", true);
529
530      this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this));
531
532      // preload table descriptor at startup
533      this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
534
535      this.maxBlancingTime = getMaxBalancingTime();
536      this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
537          HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
538
539      // Do we publish the status?
540
541      boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
542          HConstants.STATUS_PUBLISHED_DEFAULT);
543      Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
544          conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
545              ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
546              ClusterStatusPublisher.Publisher.class);
547
548      if (shouldPublish) {
549        if (publisherClass == null) {
550          LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
551              ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
552              " is not set - not publishing status");
553        } else {
554          clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
555          getChoreService().scheduleChore(clusterStatusPublisherChore);
556        }
557      }
558
559      // Some unit tests don't need a cluster, so no zookeeper at all
560      if (!conf.getBoolean("hbase.testing.nocluster", false)) {
561        this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
562      } else {
563        this.activeMasterManager = null;
564      }
565    } catch (Throwable t) {
566      // Make sure we log the exception. HMaster is often started via reflection and the
567      // cause of failed startup is lost.
568      LOG.error("Failed construction of Master", t);
569      throw t;
570    }
571  }
572
573  @Override
574  protected String getUseThisHostnameInstead(Configuration conf) {
575    return conf.get(MASTER_HOSTNAME_KEY);
576  }
577
578  // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will
579  // block in here until then.
580  @Override
581  public void run() {
582    try {
583      if (!conf.getBoolean("hbase.testing.nocluster", false)) {
584        Threads.setDaemonThreadRunning(new Thread(() -> {
585          try {
586            int infoPort = putUpJettyServer();
587            startActiveMasterManager(infoPort);
588          } catch (Throwable t) {
589            // Make sure we log the exception.
590            String error = "Failed to become Active Master";
591            LOG.error(error, t);
592            // Abort should have been called already.
593            if (!isAborted()) {
594              abort(error, t);
595            }
596          }
597        }), getName() + ":becomeActiveMaster");
598      }
599      // Fall in here even if we have been aborted. Need to run the shutdown services and
600      // the super run call will do this for us.
601      super.run();
602    } finally {
603      if (this.clusterSchemaService != null) {
604        // If on way out, then we are no longer active master.
605        this.clusterSchemaService.stopAsync();
606        try {
607          this.clusterSchemaService.awaitTerminated(
608              getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
609              DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
610        } catch (TimeoutException te) {
611          LOG.warn("Failed shutdown of clusterSchemaService", te);
612        }
613      }
614      this.activeMaster = false;
615    }
616  }
617
618  // return the actual infoPort, -1 means disable info server.
619  private int putUpJettyServer() throws IOException {
620    if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
621      return -1;
622    }
623    final int infoPort = conf.getInt("hbase.master.info.port.orig",
624      HConstants.DEFAULT_MASTER_INFOPORT);
625    // -1 is for disabling info server, so no redirecting
626    if (infoPort < 0 || infoServer == null) {
627      return -1;
628    }
629    if(infoPort == infoServer.getPort()) {
630      return infoPort;
631    }
632    final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
633    if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
634      String msg =
635          "Failed to start redirecting jetty server. Address " + addr
636              + " does not belong to this host. Correct configuration parameter: "
637              + "hbase.master.info.bindAddress";
638      LOG.error(msg);
639      throw new IOException(msg);
640    }
641
642    // TODO I'm pretty sure we could just add another binding to the InfoServer run by
643    // the RegionServer and have it run the RedirectServlet instead of standing up
644    // a second entire stack here.
645    masterJettyServer = new Server();
646    final ServerConnector connector = new ServerConnector(masterJettyServer);
647    connector.setHost(addr);
648    connector.setPort(infoPort);
649    masterJettyServer.addConnector(connector);
650    masterJettyServer.setStopAtShutdown(true);
651
652    final String redirectHostname =
653        StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead;
654
655    final RedirectServlet redirect = new RedirectServlet(infoServer, redirectHostname);
656    final WebAppContext context = new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS);
657    context.addServlet(new ServletHolder(redirect), "/*");
658    context.setServer(masterJettyServer);
659
660    try {
661      masterJettyServer.start();
662    } catch (Exception e) {
663      throw new IOException("Failed to start redirecting jetty server", e);
664    }
665    return connector.getLocalPort();
666  }
667
668  @Override
669  protected Function<TableDescriptorBuilder, TableDescriptorBuilder> getMetaTableObserver() {
670    return builder -> builder.setRegionReplication(conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM));
671  }
672  /**
673   * For compatibility, if failed with regionserver credentials, try the master one
674   */
675  @Override
676  protected void login(UserProvider user, String host) throws IOException {
677    try {
678      super.login(user, host);
679    } catch (IOException ie) {
680      user.login("hbase.master.keytab.file",
681        "hbase.master.kerberos.principal", host);
682    }
683  }
684
685  /**
686   * If configured to put regions on active master,
687   * wait till a backup master becomes active.
688   * Otherwise, loop till the server is stopped or aborted.
689   */
690  @Override
691  protected void waitForMasterActive(){
692    if (maintenanceMode) {
693      return;
694    }
695    boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(conf);
696    while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) {
697      sleeper.sleep();
698    }
699  }
700
701  @VisibleForTesting
702  public MasterRpcServices getMasterRpcServices() {
703    return (MasterRpcServices)rpcServices;
704  }
705
706  public boolean balanceSwitch(final boolean b) throws IOException {
707    return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
708  }
709
710  @Override
711  protected String getProcessName() {
712    return MASTER;
713  }
714
715  @Override
716  protected boolean canCreateBaseZNode() {
717    return true;
718  }
719
720  @Override
721  protected boolean canUpdateTableDescriptor() {
722    return true;
723  }
724
725  @Override
726  protected RSRpcServices createRpcServices() throws IOException {
727    return new MasterRpcServices(this);
728  }
729
730  @Override
731  protected void configureInfoServer() {
732    infoServer.addUnprivilegedServlet("master-status", "/master-status", MasterStatusServlet.class);
733    infoServer.setAttribute(MASTER, this);
734    if (LoadBalancer.isTablesOnMaster(conf)) {
735      super.configureInfoServer();
736    }
737  }
738
739  @Override
740  protected Class<? extends HttpServlet> getDumpServlet() {
741    return MasterDumpServlet.class;
742  }
743
744  @Override
745  public MetricsMaster getMasterMetrics() {
746    return metricsMaster;
747  }
748
749  /**
750   * <p>
751   * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
752   * should have already been initialized along with {@link ServerManager}.
753   * </p>
754   * <p>
755   * Will be overridden in tests.
756   * </p>
757   */
758  @VisibleForTesting
759  protected void initializeZKBasedSystemTrackers()
760      throws IOException, InterruptedException, KeeperException, ReplicationException {
761    this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
762    this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf);
763    this.normalizer.setMasterServices(this);
764    this.normalizer.setMasterRpcServices((MasterRpcServices)rpcServices);
765    this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
766    this.loadBalancerTracker.start();
767
768    this.regionNormalizerTracker = new RegionNormalizerTracker(zooKeeper, this);
769    this.regionNormalizerTracker.start();
770
771    this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this);
772    this.splitOrMergeTracker.start();
773
774    this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
775
776    this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
777    this.drainingServerTracker.start();
778
779    String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM);
780    boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE,
781      HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE);
782    if (clientQuorumServers != null && !clientZkObserverMode) {
783      // we need to take care of the ZK information synchronization
784      // if given client ZK are not observer nodes
785      ZKWatcher clientZkWatcher = new ZKWatcher(conf,
786          getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this,
787          false, true);
788      this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this);
789      this.metaLocationSyncer.start();
790      this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this);
791      this.masterAddressSyncer.start();
792      // set cluster id is a one-go effort
793      ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId());
794    }
795
796    // Set the cluster as up.  If new RSs, they'll be waiting on this before
797    // going ahead with their startup.
798    boolean wasUp = this.clusterStatusTracker.isClusterUp();
799    if (!wasUp) this.clusterStatusTracker.setClusterUp();
800
801    LOG.info("Active/primary master=" + this.serverName +
802        ", sessionid=0x" +
803        Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
804        ", setting cluster-up flag (Was=" + wasUp + ")");
805
806    // create/initialize the snapshot manager and other procedure managers
807    this.snapshotManager = new SnapshotManager();
808    this.mpmHost = new MasterProcedureManagerHost();
809    this.mpmHost.register(this.snapshotManager);
810    this.mpmHost.register(new MasterFlushTableProcedureManager());
811    this.mpmHost.loadProcedures(conf);
812    this.mpmHost.initialize(this, this.metricsMaster);
813  }
814
815  /**
816   * Finish initialization of HMaster after becoming the primary master.
817   * <p/>
818   * The startup order is a bit complicated but very important, do not change it unless you know
819   * what you are doing.
820   * <ol>
821   * <li>Initialize file system based components - file system manager, wal manager, table
822   * descriptors, etc</li>
823   * <li>Publish cluster id</li>
824   * <li>Here comes the most complicated part - initialize server manager, assignment manager and
825   * region server tracker
826   * <ol type='i'>
827   * <li>Create server manager</li>
828   * <li>Create procedure executor, load the procedures, but do not start workers. We will start it
829   * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
830   * server</li>
831   * <li>Create assignment manager and start it, load the meta region state, but do not load data
832   * from meta region</li>
833   * <li>Start region server tracker, construct the online servers set and find out dead servers and
834   * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
835   * scan the wal directory to find out possible live region servers, and the differences between
836   * these two sets are the dead servers</li>
837   * </ol>
838   * </li>
839   * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
840   * <li>Start necessary service threads - balancer, catalog janior, executor services, and also the
841   * procedure executor, etc. Notice that the balancer must be created first as assignment manager
842   * may use it when assigning regions.</li>
843   * <li>Wait for meta to be initialized if necesssary, start table state manager.</li>
844   * <li>Wait for enough region servers to check-in</li>
845   * <li>Let assignment manager load data from meta and construct region states</li>
846   * <li>Start all other things such as chore services, etc</li>
847   * </ol>
848   * <p/>
849   * Notice that now we will not schedule a special procedure to make meta online(unless the first
850   * time where meta has not been created yet), we will rely on SCP to bring meta online.
851   */
852  private void finishActiveMasterInitialization(MonitoredTask status)
853      throws IOException, InterruptedException, KeeperException, ReplicationException {
854    /*
855     * We are active master now... go initialize components we need to run.
856     */
857    status.setStatus("Initializing Master file system");
858
859    this.masterActiveTime = System.currentTimeMillis();
860    // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
861
862    // Only initialize the MemStoreLAB when master carry table
863    if (LoadBalancer.isTablesOnMaster(conf)) {
864      initializeMemStoreChunkCreator();
865    }
866    this.fileSystemManager = new MasterFileSystem(conf);
867    this.walManager = new MasterWalManager(this);
868
869    // enable table descriptors cache
870    this.tableDescriptors.setCacheOn();
871
872    // warm-up HTDs cache on master initialization
873    if (preLoadTableDescriptors) {
874      status.setStatus("Pre-loading table descriptors");
875      this.tableDescriptors.getAll();
876    }
877
878    // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
879    // only after it has checked in with the Master. At least a few tests ask Master for clusterId
880    // before it has called its run method and before RegionServer has done the reportForDuty.
881    ClusterId clusterId = fileSystemManager.getClusterId();
882    status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
883    ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
884    this.clusterId = clusterId.toString();
885
886    // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
887    // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
888    // hbase.write.hbck1.lock.file to false.
889    if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
890      HBaseFsck.checkAndMarkRunningHbck(this.conf,
891          HBaseFsck.createLockRetryCounterFactory(this.conf).create());
892    }
893
894    status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
895    this.serverManager = createServerManager(this);
896    createProcedureExecutor();
897    // Create Assignment Manager
898    this.assignmentManager = new AssignmentManager(this);
899    this.assignmentManager.start();
900    // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
901    // be registered in the deadServers set -- and with the list of servernames out on the
902    // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
903    // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
904    // TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
905    this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager);
906    this.regionServerTracker.start(
907      procedureExecutor.getProcedures().stream().filter(p -> p instanceof ServerCrashProcedure)
908        .map(p -> ((ServerCrashProcedure) p)).collect(Collectors.toSet()),
909      walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir());
910    // This manager will be started AFTER hbase:meta is confirmed on line.
911    // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table
912    // state from zookeeper while hbase2 reads it from hbase:meta. Disable if no hbase1 clients.
913    this.tableStateManager =
914        this.conf.getBoolean(MirroringTableStateManager.MIRROR_TABLE_STATE_TO_ZK_KEY, true)?
915        new MirroringTableStateManager(this):
916        new TableStateManager(this);
917
918    status.setStatus("Initializing ZK system trackers");
919    initializeZKBasedSystemTrackers();
920    // Set ourselves as active Master now our claim has succeeded up in zk.
921    this.activeMaster = true;
922
923    // Start the Zombie master detector after setting master as active, see HBASE-21535
924    Thread zombieDetector = new Thread(new InitializationMonitor(this),
925        "ActiveMasterInitializationMonitor-" + System.currentTimeMillis());
926    zombieDetector.setDaemon(true);
927    zombieDetector.start();
928
929    // This is for backwards compatibility
930    // See HBASE-11393
931    status.setStatus("Update TableCFs node in ZNode");
932    ReplicationPeerConfigUpgrader tableCFsUpdater =
933        new ReplicationPeerConfigUpgrader(zooKeeper, conf);
934    tableCFsUpdater.copyTableCFs();
935
936    if (!maintenanceMode) {
937      // Add the Observer to delete quotas on table deletion before starting all CPs by
938      // default with quota support, avoiding if user specifically asks to not load this Observer.
939      if (QuotaUtil.isQuotaEnabled(conf)) {
940        updateConfigurationForQuotasObserver(conf);
941      }
942      // initialize master side coprocessors before we start handling requests
943      status.setStatus("Initializing master coprocessors");
944      this.cpHost = new MasterCoprocessorHost(this, this.conf);
945    }
946
947    // Checking if meta needs initializing.
948    status.setStatus("Initializing meta table if this is a new deploy");
949    InitMetaProcedure initMetaProc = null;
950    // Print out state of hbase:meta on startup; helps debugging.
951    RegionState rs = this.assignmentManager.getRegionStates().
952        getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO);
953    LOG.info("hbase:meta {}", rs);
954    if (rs.isOffline()) {
955      Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
956        .filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
957      initMetaProc = optProc.orElseGet(() -> {
958        // schedule an init meta procedure if meta has not been deployed yet
959        InitMetaProcedure temp = new InitMetaProcedure();
960        procedureExecutor.submitProcedure(temp);
961        return temp;
962      });
963    }
964    if (this.balancer instanceof FavoredNodesPromoter) {
965      favoredNodesManager = new FavoredNodesManager(this);
966    }
967
968    // initialize load balancer
969    this.balancer.setMasterServices(this);
970    this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
971    this.balancer.initialize();
972
973    // start up all service threads.
974    status.setStatus("Initializing master service threads");
975    startServiceThreads();
976    // wait meta to be initialized after we start procedure executor
977    if (initMetaProc != null) {
978      initMetaProc.await();
979    }
980    // Wake up this server to check in
981    sleeper.skipSleepCycle();
982
983    // Wait for region servers to report in.
984    // With this as part of master initialization, it precludes our being able to start a single
985    // server that is both Master and RegionServer. Needs more thought. TODO.
986    String statusStr = "Wait for region servers to report in";
987    status.setStatus(statusStr);
988    LOG.info(Objects.toString(status));
989    waitForRegionServers(status);
990
991    // Check if master is shutting down because issue initializing regionservers or balancer.
992    if (isStopped()) {
993      return;
994    }
995
996    status.setStatus("Starting assignment manager");
997    // FIRST HBASE:META READ!!!!
998    // The below cannot make progress w/o hbase:meta being online.
999    // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
1000    // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
1001    // if it is down. It may take a while to come online. So, wait here until meta if for sure
1002    // available. That's what waitForMetaOnline does.
1003    if (!waitForMetaOnline()) {
1004      return;
1005    }
1006    this.assignmentManager.joinCluster();
1007    // The below depends on hbase:meta being online.
1008    this.tableStateManager.start();
1009    // Below has to happen after tablestatemanager has started in the case where this hbase-2.x
1010    // is being started over an hbase-1.x dataset. tablestatemanager runs a migration as part
1011    // of its 'start' moving table state from zookeeper to hbase:meta. This migration needs to
1012    // complete before we do this next step processing offline regions else it fails reading
1013    // table states messing up master launch (namespace table, etc., are not assigned).
1014    this.assignmentManager.processOfflineRegions();
1015    // Initialize after meta is up as below scans meta
1016    if (favoredNodesManager != null && !maintenanceMode) {
1017      SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
1018          new SnapshotOfRegionAssignmentFromMeta(getConnection());
1019      snapshotOfRegionAssignment.initialize();
1020      favoredNodesManager.initialize(snapshotOfRegionAssignment);
1021    }
1022
1023    // set cluster status again after user regions are assigned
1024    this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1025
1026    // Start balancer and meta catalog janitor after meta and regions have been assigned.
1027    status.setStatus("Starting balancer and catalog janitor");
1028    this.clusterStatusChore = new ClusterStatusChore(this, balancer);
1029    getChoreService().scheduleChore(clusterStatusChore);
1030    this.balancerChore = new BalancerChore(this);
1031    getChoreService().scheduleChore(balancerChore);
1032    this.normalizerChore = new RegionNormalizerChore(this);
1033    getChoreService().scheduleChore(normalizerChore);
1034    this.catalogJanitorChore = new CatalogJanitor(this);
1035    getChoreService().scheduleChore(catalogJanitorChore);
1036    this.hbckChore = new HbckChore(this);
1037    getChoreService().scheduleChore(hbckChore);
1038
1039    // NAMESPACE READ!!!!
1040    // Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
1041    // TODO: Fix this. Namespace is a pain being a sort-of system table. Fold it in to hbase:meta.
1042    // isNamespace does like isMeta and waits until namespace is onlined before allowing progress.
1043    if (!waitForNamespaceOnline()) {
1044      return;
1045    }
1046    status.setStatus("Starting cluster schema service");
1047    initClusterSchemaService();
1048
1049    if (this.cpHost != null) {
1050      try {
1051        this.cpHost.preMasterInitialization();
1052      } catch (IOException e) {
1053        LOG.error("Coprocessor preMasterInitialization() hook failed", e);
1054      }
1055    }
1056
1057    status.markComplete("Initialization successful");
1058    LOG.info(String.format("Master has completed initialization %.3fsec",
1059       (System.currentTimeMillis() - masterActiveTime) / 1000.0f));
1060    this.masterFinishedInitializationTime = System.currentTimeMillis();
1061    configurationManager.registerObserver(this.balancer);
1062    configurationManager.registerObserver(this.cleanerPool);
1063    configurationManager.registerObserver(this.hfileCleaner);
1064    configurationManager.registerObserver(this.logCleaner);
1065    // Set master as 'initialized'.
1066    setInitialized(true);
1067
1068    if (maintenanceMode) {
1069      LOG.info("Detected repair mode, skipping final initialization steps.");
1070      return;
1071    }
1072
1073    assignmentManager.checkIfShouldMoveSystemRegionAsync();
1074    status.setStatus("Assign meta replicas");
1075    MasterMetaBootstrap metaBootstrap = createMetaBootstrap();
1076    metaBootstrap.assignMetaReplicas();
1077    status.setStatus("Starting quota manager");
1078    initQuotaManager();
1079    if (QuotaUtil.isQuotaEnabled(conf)) {
1080      // Create the quota snapshot notifier
1081      spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
1082      spaceQuotaSnapshotNotifier.initialize(getClusterConnection());
1083      this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
1084      // Start the chore to read the region FS space reports and act on them
1085      getChoreService().scheduleChore(quotaObserverChore);
1086
1087      this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
1088      // Start the chore to read snapshots and add their usage to table/NS quotas
1089      getChoreService().scheduleChore(snapshotQuotaChore);
1090    }
1091
1092    // clear the dead servers with same host name and port of online server because we are not
1093    // removing dead server with same hostname and port of rs which is trying to check in before
1094    // master initialization. See HBASE-5916.
1095    this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
1096
1097    // Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
1098    status.setStatus("Checking ZNode ACLs");
1099    zooKeeper.checkAndSetZNodeAcls();
1100
1101    status.setStatus("Initializing MOB Cleaner");
1102    initMobCleaner();
1103
1104    status.setStatus("Calling postStartMaster coprocessors");
1105    if (this.cpHost != null) {
1106      // don't let cp initialization errors kill the master
1107      try {
1108        this.cpHost.postStartMaster();
1109      } catch (IOException ioe) {
1110        LOG.error("Coprocessor postStartMaster() hook failed", ioe);
1111      }
1112    }
1113
1114    zombieDetector.interrupt();
1115
1116    /*
1117     * After master has started up, lets do balancer post startup initialization. Since this runs
1118     * in activeMasterManager thread, it should be fine.
1119     */
1120    long start = System.currentTimeMillis();
1121    this.balancer.postMasterStartupInitialize();
1122    if (LOG.isDebugEnabled()) {
1123      LOG.debug("Balancer post startup initialization complete, took " + (
1124          (System.currentTimeMillis() - start) / 1000) + " seconds");
1125    }
1126  }
1127
1128  /**
1129   * Check hbase:meta is up and ready for reading. For use during Master startup only.
1130   * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
1131   *   and we will hold here until operator intervention.
1132   */
1133  @VisibleForTesting
1134  public boolean waitForMetaOnline() throws InterruptedException {
1135    return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
1136  }
1137
1138  /**
1139   * @return True if region is online and scannable else false if an error or shutdown (Otherwise
1140   *   we just block in here holding up all forward-progess).
1141   */
1142  private boolean isRegionOnline(RegionInfo ri) throws InterruptedException {
1143    RetryCounter rc = null;
1144    while (!isStopped()) {
1145      RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri);
1146      if (rs.isOpened()) {
1147        if (this.getServerManager().isServerOnline(rs.getServerName())) {
1148          return true;
1149        }
1150      }
1151      // Region is not OPEN.
1152      Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures().
1153          stream().filter(p -> p instanceof ServerCrashProcedure).findAny();
1154      // TODO: Add a page to refguide on how to do repair. Have this log message point to it.
1155      // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and
1156      // then how to assign including how to break region lock if one held.
1157      LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " +
1158          "progress, in holding-pattern until region onlined.",
1159          ri.getRegionNameAsString(), rs, optProc.isPresent());
1160      // Check once-a-minute.
1161      if (rc == null) {
1162        rc = new RetryCounterFactory(1000).create();
1163      }
1164      Threads.sleep(rc.getBackoffTimeAndIncrementAttempts());
1165    }
1166    return false;
1167  }
1168
1169  /**
1170   * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table
1171   * (TODO: Fix this! NS should not hold-up startup).
1172   * @return True if namespace table is up/online.
1173   */
1174  @VisibleForTesting
1175  public boolean waitForNamespaceOnline() throws InterruptedException {
1176    List<RegionInfo> ris = this.assignmentManager.getRegionStates().
1177        getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
1178    if (ris.isEmpty()) {
1179      // If empty, means we've not assigned the namespace table yet... Just return true so startup
1180      // continues and the namespace table gets created.
1181      return true;
1182    }
1183    // Else there are namespace regions up in meta. Ensure they are assigned before we go on.
1184    for (RegionInfo ri: ris) {
1185      isRegionOnline(ri);
1186    }
1187    return true;
1188  }
1189
1190  /**
1191   * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to
1192   * automatically remove quotas for a table when that table is deleted.
1193   */
1194  @VisibleForTesting
1195  public void updateConfigurationForQuotasObserver(Configuration conf) {
1196    // We're configured to not delete quotas on table deletion, so we don't need to add the obs.
1197    if (!conf.getBoolean(
1198          MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE,
1199          MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE_DEFAULT)) {
1200      return;
1201    }
1202    String[] masterCoprocs = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
1203    final int length = null == masterCoprocs ? 0 : masterCoprocs.length;
1204    String[] updatedCoprocs = new String[length + 1];
1205    if (length > 0) {
1206      System.arraycopy(masterCoprocs, 0, updatedCoprocs, 0, masterCoprocs.length);
1207    }
1208    updatedCoprocs[length] = MasterQuotasObserver.class.getName();
1209    conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, updatedCoprocs);
1210  }
1211
1212  private void initMobCleaner() {
1213    this.expiredMobFileCleanerChore = new ExpiredMobFileCleanerChore(this);
1214    getChoreService().scheduleChore(expiredMobFileCleanerChore);
1215
1216    int mobCompactionPeriod = conf.getInt(MobConstants.MOB_COMPACTION_CHORE_PERIOD,
1217        MobConstants.DEFAULT_MOB_COMPACTION_CHORE_PERIOD);
1218    this.mobCompactChore = new MobCompactionChore(this, mobCompactionPeriod);
1219    getChoreService().scheduleChore(mobCompactChore);
1220    this.mobCompactThread = new MasterMobCompactionThread(this);
1221  }
1222
1223  /**
1224   * <p>
1225   * Create a {@link MasterMetaBootstrap} instance.
1226   * </p>
1227   * <p>
1228   * Will be overridden in tests.
1229   * </p>
1230   */
1231  @VisibleForTesting
1232  protected MasterMetaBootstrap createMetaBootstrap() {
1233    // We put this out here in a method so can do a Mockito.spy and stub it out
1234    // w/ a mocked up MasterMetaBootstrap.
1235    return new MasterMetaBootstrap(this);
1236  }
1237
1238  /**
1239   * <p>
1240   * Create a {@link ServerManager} instance.
1241   * </p>
1242   * <p>
1243   * Will be overridden in tests.
1244   * </p>
1245   */
1246  @VisibleForTesting
1247  protected ServerManager createServerManager(final MasterServices master) throws IOException {
1248    // We put this out here in a method so can do a Mockito.spy and stub it out
1249    // w/ a mocked up ServerManager.
1250    setupClusterConnection();
1251    return new ServerManager(master);
1252  }
1253
1254  private void waitForRegionServers(final MonitoredTask status)
1255      throws IOException, InterruptedException {
1256    this.serverManager.waitForRegionServers(status);
1257  }
1258
1259  // Will be overridden in tests
1260  @VisibleForTesting
1261  protected void initClusterSchemaService() throws IOException, InterruptedException {
1262    this.clusterSchemaService = new ClusterSchemaServiceImpl(this);
1263    this.clusterSchemaService.startAsync();
1264    try {
1265      this.clusterSchemaService.awaitRunning(getConfiguration().getInt(
1266        HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
1267        DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
1268    } catch (TimeoutException toe) {
1269      throw new IOException("Timedout starting ClusterSchemaService", toe);
1270    }
1271  }
1272
1273  private void initQuotaManager() throws IOException {
1274    MasterQuotaManager quotaManager = new MasterQuotaManager(this);
1275    quotaManager.start();
1276    this.quotaManager = quotaManager;
1277  }
1278
1279  private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() {
1280    SpaceQuotaSnapshotNotifier notifier =
1281        SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration());
1282    return notifier;
1283  }
1284
1285  boolean isCatalogJanitorEnabled() {
1286    return catalogJanitorChore != null ?
1287      catalogJanitorChore.getEnabled() : false;
1288  }
1289
1290  boolean isCleanerChoreEnabled() {
1291    boolean hfileCleanerFlag = true, logCleanerFlag = true;
1292
1293    if (hfileCleaner != null) {
1294      hfileCleanerFlag = hfileCleaner.getEnabled();
1295    }
1296
1297    if (logCleaner != null) {
1298      logCleanerFlag = logCleaner.getEnabled();
1299    }
1300
1301    return (hfileCleanerFlag && logCleanerFlag);
1302  }
1303
1304  @Override
1305  public TableDescriptors getTableDescriptors() {
1306    return this.tableDescriptors;
1307  }
1308
1309  @Override
1310  public ServerManager getServerManager() {
1311    return this.serverManager;
1312  }
1313
1314  @Override
1315  public MasterFileSystem getMasterFileSystem() {
1316    return this.fileSystemManager;
1317  }
1318
1319  @Override
1320  public MasterWalManager getMasterWalManager() {
1321    return this.walManager;
1322  }
1323
1324  @Override
1325  public TableStateManager getTableStateManager() {
1326    return tableStateManager;
1327  }
1328
1329  /*
1330   * Start up all services. If any of these threads gets an unhandled exception
1331   * then they just die with a logged message.  This should be fine because
1332   * in general, we do not expect the master to get such unhandled exceptions
1333   *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
1334   *  need to install an unexpected exception handler.
1335   */
1336  private void startServiceThreads() throws IOException {
1337    // Start the executor service pools
1338    this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION, conf.getInt(
1339      HConstants.MASTER_OPEN_REGION_THREADS, HConstants.MASTER_OPEN_REGION_THREADS_DEFAULT));
1340    this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, conf.getInt(
1341      HConstants.MASTER_CLOSE_REGION_THREADS, HConstants.MASTER_CLOSE_REGION_THREADS_DEFAULT));
1342    this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1343      conf.getInt(HConstants.MASTER_SERVER_OPERATIONS_THREADS,
1344        HConstants.MASTER_SERVER_OPERATIONS_THREADS_DEFAULT));
1345    this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1346      conf.getInt(HConstants.MASTER_META_SERVER_OPERATIONS_THREADS,
1347        HConstants.MASTER_META_SERVER_OPERATIONS_THREADS_DEFAULT));
1348    this.executorService.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS, conf.getInt(
1349      HConstants.MASTER_LOG_REPLAY_OPS_THREADS, HConstants.MASTER_LOG_REPLAY_OPS_THREADS_DEFAULT));
1350    this.executorService.startExecutorService(ExecutorType.MASTER_SNAPSHOT_OPERATIONS, conf.getInt(
1351      SnapshotManager.SNAPSHOT_POOL_THREADS_KEY, SnapshotManager.SNAPSHOT_POOL_THREADS_DEFAULT));
1352
1353   // We depend on there being only one instance of this executor running
1354   // at a time.  To do concurrency, would need fencing of enable/disable of
1355   // tables.
1356   // Any time changing this maxThreads to > 1, pls see the comment at
1357   // AccessController#postCompletedCreateTableAction
1358   this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1359   startProcedureExecutor();
1360
1361    // Create cleaner thread pool
1362    cleanerPool = new DirScanPool(conf);
1363    // Start log cleaner thread
1364    int cleanerInterval = conf.getInt("hbase.master.cleaner.interval", 600 * 1000);
1365    this.logCleaner = new LogCleaner(cleanerInterval, this, conf,
1366      getMasterWalManager().getFileSystem(), getMasterWalManager().getOldLogDir(), cleanerPool);
1367    getChoreService().scheduleChore(logCleaner);
1368
1369    // start the hfile archive cleaner thread
1370    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1371    Map<String, Object> params = new HashMap<>();
1372    params.put(MASTER, this);
1373    this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf,
1374      getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
1375    getChoreService().scheduleChore(hfileCleaner);
1376
1377    replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
1378      replicationPeerManager);
1379    getChoreService().scheduleChore(replicationBarrierCleaner);
1380
1381    serviceStarted = true;
1382    if (LOG.isTraceEnabled()) {
1383      LOG.trace("Started service threads");
1384    }
1385  }
1386
1387  @Override
1388  protected void stopServiceThreads() {
1389    if (masterJettyServer != null) {
1390      LOG.info("Stopping master jetty server");
1391      try {
1392        masterJettyServer.stop();
1393      } catch (Exception e) {
1394        LOG.error("Failed to stop master jetty server", e);
1395      }
1396    }
1397    stopChores();
1398    if (this.mobCompactThread != null) {
1399      this.mobCompactThread.close();
1400    }
1401    super.stopServiceThreads();
1402    if (cleanerPool != null) {
1403      cleanerPool.shutdownNow();
1404      cleanerPool = null;
1405    }
1406
1407    LOG.debug("Stopping service threads");
1408
1409    if (this.quotaManager != null) {
1410      this.quotaManager.stop();
1411    }
1412
1413    if (this.activeMasterManager != null) {
1414      this.activeMasterManager.stop();
1415    }
1416    if (this.serverManager != null) {
1417      this.serverManager.stop();
1418    }
1419    if (this.assignmentManager != null) {
1420      this.assignmentManager.stop();
1421    }
1422
1423    stopProcedureExecutor();
1424
1425    if (this.walManager != null) {
1426      this.walManager.stop();
1427    }
1428    if (this.fileSystemManager != null) {
1429      this.fileSystemManager.stop();
1430    }
1431    if (this.mpmHost != null) {
1432      this.mpmHost.stop("server shutting down.");
1433    }
1434    if (this.regionServerTracker != null) {
1435      this.regionServerTracker.stop();
1436    }
1437  }
1438
1439  private void createProcedureExecutor() throws IOException {
1440    MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
1441    procedureStore =
1442      new WALProcedureStore(conf, new MasterProcedureEnv.WALStoreLeaseRecovery(this));
1443    procedureStore.registerListener(new ProcedureStoreListener() {
1444
1445      @Override
1446      public void abortProcess() {
1447        abort("The Procedure Store lost the lease", null);
1448      }
1449    });
1450    MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
1451    procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
1452    configurationManager.registerObserver(procEnv);
1453
1454    int cpus = Runtime.getRuntime().availableProcessors();
1455    final int numThreads = conf.getInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, Math.max(
1456      (cpus > 0 ? cpus / 4 : 0), MasterProcedureConstants.DEFAULT_MIN_MASTER_PROCEDURE_THREADS));
1457    final int urgentWorkers = conf
1458        .getInt(MasterProcedureConstants.MASTER_URGENT_PROCEDURE_THREADS,
1459            MasterProcedureConstants.DEFAULT_MASTER_URGENT_PROCEDURE_THREADS);
1460    final boolean abortOnCorruption =
1461      conf.getBoolean(MasterProcedureConstants.EXECUTOR_ABORT_ON_CORRUPTION,
1462        MasterProcedureConstants.DEFAULT_EXECUTOR_ABORT_ON_CORRUPTION);
1463    procedureStore.start(numThreads);
1464    // Just initialize it but do not start the workers, we will start the workers later by calling
1465    // startProcedureExecutor. See the javadoc for finishActiveMasterInitialization for more
1466    // details.
1467    procedureExecutor.init(numThreads, urgentWorkers, abortOnCorruption);
1468    procEnv.getRemoteDispatcher().start();
1469  }
1470
1471  private void startProcedureExecutor() throws IOException {
1472    procedureExecutor.startWorkers();
1473  }
1474
1475  private void stopProcedureExecutor() {
1476    if (procedureExecutor != null) {
1477      configurationManager.deregisterObserver(procedureExecutor.getEnvironment());
1478      procedureExecutor.getEnvironment().getRemoteDispatcher().stop();
1479      procedureExecutor.stop();
1480      procedureExecutor.join();
1481      procedureExecutor = null;
1482    }
1483
1484    if (procedureStore != null) {
1485      procedureStore.stop(isAborted());
1486      procedureStore = null;
1487    }
1488  }
1489
1490  private void stopChores() {
1491    ChoreService choreService = getChoreService();
1492    if (choreService != null) {
1493      choreService.cancelChore(this.expiredMobFileCleanerChore);
1494      choreService.cancelChore(this.mobCompactChore);
1495      choreService.cancelChore(this.balancerChore);
1496      choreService.cancelChore(this.normalizerChore);
1497      choreService.cancelChore(this.clusterStatusChore);
1498      choreService.cancelChore(this.catalogJanitorChore);
1499      choreService.cancelChore(this.clusterStatusPublisherChore);
1500      choreService.cancelChore(this.snapshotQuotaChore);
1501      choreService.cancelChore(this.logCleaner);
1502      choreService.cancelChore(this.hfileCleaner);
1503      choreService.cancelChore(this.replicationBarrierCleaner);
1504      choreService.cancelChore(this.hbckChore);
1505    }
1506  }
1507
1508  /**
1509   * @return Get remote side's InetAddress
1510   */
1511  InetAddress getRemoteInetAddress(final int port,
1512      final long serverStartCode) throws UnknownHostException {
1513    // Do it out here in its own little method so can fake an address when
1514    // mocking up in tests.
1515    InetAddress ia = RpcServer.getRemoteIp();
1516
1517    // The call could be from the local regionserver,
1518    // in which case, there is no remote address.
1519    if (ia == null && serverStartCode == startcode) {
1520      InetSocketAddress isa = rpcServices.getSocketAddress();
1521      if (isa != null && isa.getPort() == port) {
1522        ia = isa.getAddress();
1523      }
1524    }
1525    return ia;
1526  }
1527
1528  /**
1529   * @return Maximum time we should run balancer for
1530   */
1531  private int getMaxBalancingTime() {
1532    int maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_MAX_BALANCING, -1);
1533    if (maxBalancingTime == -1) {
1534      // if max balancing time isn't set, defaulting it to period time
1535      maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_PERIOD,
1536        HConstants.DEFAULT_HBASE_BALANCER_PERIOD);
1537    }
1538    return maxBalancingTime;
1539  }
1540
1541  /**
1542   * @return Maximum number of regions in transition
1543   */
1544  private int getMaxRegionsInTransition() {
1545    int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size();
1546    return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1);
1547  }
1548
1549  /**
1550   * It first sleep to the next balance plan start time. Meanwhile, throttling by the max
1551   * number regions in transition to protect availability.
1552   * @param nextBalanceStartTime The next balance plan start time
1553   * @param maxRegionsInTransition max number of regions in transition
1554   * @param cutoffTime when to exit balancer
1555   */
1556  private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition,
1557      long cutoffTime) {
1558    boolean interrupted = false;
1559
1560    // Sleep to next balance plan start time
1561    // But if there are zero regions in transition, it can skip sleep to speed up.
1562    while (!interrupted && System.currentTimeMillis() < nextBalanceStartTime
1563        && this.assignmentManager.getRegionStates().hasRegionsInTransition()) {
1564      try {
1565        Thread.sleep(100);
1566      } catch (InterruptedException ie) {
1567        interrupted = true;
1568      }
1569    }
1570
1571    // Throttling by max number regions in transition
1572    while (!interrupted
1573        && maxRegionsInTransition > 0
1574        && this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
1575        >= maxRegionsInTransition && System.currentTimeMillis() <= cutoffTime) {
1576      try {
1577        // sleep if the number of regions in transition exceeds the limit
1578        Thread.sleep(100);
1579      } catch (InterruptedException ie) {
1580        interrupted = true;
1581      }
1582    }
1583
1584    if (interrupted) Thread.currentThread().interrupt();
1585  }
1586
1587  public boolean balance() throws IOException {
1588    return balance(false);
1589  }
1590
1591  public boolean balance(boolean force) throws IOException {
1592    // if master not initialized, don't run balancer.
1593    if (!isInitialized()) {
1594      LOG.debug("Master has not been initialized, don't run balancer.");
1595      return false;
1596    }
1597
1598    if (isInMaintenanceMode()) {
1599      LOG.info("Master is in maintenanceMode mode, don't run balancer.");
1600      return false;
1601    }
1602
1603    int maxRegionsInTransition = getMaxRegionsInTransition();
1604    synchronized (this.balancer) {
1605      // If balance not true, don't run balancer.
1606      if (!this.loadBalancerTracker.isBalancerOn()) return false;
1607        // Only allow one balance run at at time.
1608      if (this.assignmentManager.hasRegionsInTransition()) {
1609        List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
1610        // if hbase:meta region is in transition, result of assignment cannot be recorded
1611        // ignore the force flag in that case
1612        boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
1613        String prefix = force && !metaInTransition ? "R" : "Not r";
1614        List<RegionStateNode> toPrint = regionsInTransition;
1615        int max = 5;
1616        boolean truncated = false;
1617        if (regionsInTransition.size() > max) {
1618          toPrint = regionsInTransition.subList(0, max);
1619          truncated = true;
1620        }
1621        LOG.info(prefix + "unning balancer because " + regionsInTransition.size() +
1622          " region(s) in transition: " + toPrint + (truncated? "(truncated list)": ""));
1623        if (!force || metaInTransition) return false;
1624      }
1625      if (this.serverManager.areDeadServersInProgress()) {
1626        LOG.info("Not running balancer because processing dead regionserver(s): " +
1627          this.serverManager.getDeadServers());
1628        return false;
1629      }
1630      Map<ServerName, ServerMetrics> onlineServers = serverManager.getOnlineServers();
1631      int regionNotOnOnlineServer = 0;
1632      for (RegionState regionState : assignmentManager.getRegionStates().getRegionStates()) {
1633        if (regionState.isOpened() && !onlineServers
1634            .containsKey(regionState.getServerName())) {
1635          LOG.warn("{} 's server is not in the online server list.", regionState);
1636          regionNotOnOnlineServer++;
1637        }
1638      }
1639      if (regionNotOnOnlineServer > 0) {
1640        LOG.info("Not running balancer because {} regions found not on an online server",
1641            regionNotOnOnlineServer);
1642        return false;
1643      }
1644
1645      if (this.cpHost != null) {
1646        try {
1647          if (this.cpHost.preBalance()) {
1648            LOG.debug("Coprocessor bypassing balancer request");
1649            return false;
1650          }
1651        } catch (IOException ioe) {
1652          LOG.error("Error invoking master coprocessor preBalance()", ioe);
1653          return false;
1654        }
1655      }
1656
1657      boolean isByTable = getConfiguration().getBoolean("hbase.master.loadbalance.bytable", false);
1658      Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
1659          this.assignmentManager.getRegionStates().getAssignmentsForBalancer(isByTable);
1660      for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
1661        serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
1662      }
1663
1664      //Give the balancer the current cluster state.
1665      this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1666      this.balancer.setClusterLoad(assignments);
1667
1668      List<RegionPlan> plans = new ArrayList<>();
1669      for (Entry<TableName, Map<ServerName, List<RegionInfo>>> e : assignments.entrySet()) {
1670        List<RegionPlan> partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue());
1671        if (partialPlans != null) {
1672          plans.addAll(partialPlans);
1673        }
1674      }
1675
1676      long balanceStartTime = System.currentTimeMillis();
1677      long cutoffTime = balanceStartTime + this.maxBlancingTime;
1678      int rpCount = 0;  // number of RegionPlans balanced so far
1679      if (plans != null && !plans.isEmpty()) {
1680        int balanceInterval = this.maxBlancingTime / plans.size();
1681        LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is "
1682            + balanceInterval + " ms, and the max number regions in transition is "
1683            + maxRegionsInTransition);
1684
1685        for (RegionPlan plan: plans) {
1686          LOG.info("balance " + plan);
1687          //TODO: bulk assign
1688          try {
1689            this.assignmentManager.moveAsync(plan);
1690          } catch (HBaseIOException hioe) {
1691            //should ignore failed plans here, avoiding the whole balance plans be aborted
1692            //later calls of balance() can fetch up the failed and skipped plans
1693            LOG.warn("Failed balance plan: {}, just skip it", plan, hioe);
1694          }
1695          //rpCount records balance plans processed, does not care if a plan succeeds
1696          rpCount++;
1697
1698          balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition,
1699            cutoffTime);
1700
1701          // if performing next balance exceeds cutoff time, exit the loop
1702          if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) {
1703            // TODO: After balance, there should not be a cutoff time (keeping it as
1704            // a security net for now)
1705            LOG.debug("No more balancing till next balance run; maxBalanceTime="
1706                + this.maxBlancingTime);
1707            break;
1708          }
1709        }
1710      }
1711
1712      if (this.cpHost != null) {
1713        try {
1714          this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans);
1715        } catch (IOException ioe) {
1716          // balancing already succeeded so don't change the result
1717          LOG.error("Error invoking master coprocessor postBalance()", ioe);
1718        }
1719      }
1720    }
1721    // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1722    // Return true indicating a success.
1723    return true;
1724  }
1725
1726  @Override
1727  @VisibleForTesting
1728  public RegionNormalizer getRegionNormalizer() {
1729    return this.normalizer;
1730  }
1731
1732  /**
1733   * Perform normalization of cluster (invoked by {@link RegionNormalizerChore}).
1734   *
1735   * @return true if normalization step was performed successfully, false otherwise
1736   *    (specifically, if HMaster hasn't been initialized properly or normalization
1737   *    is globally disabled)
1738   */
1739  public boolean normalizeRegions() throws IOException {
1740    if (!isInitialized()) {
1741      LOG.debug("Master has not been initialized, don't run region normalizer.");
1742      return false;
1743    }
1744    if (this.getServerManager().isClusterShutdown()) {
1745      LOG.info("Cluster is shutting down, don't run region normalizer.");
1746      return false;
1747    }
1748    if (isInMaintenanceMode()) {
1749      LOG.info("Master is in maintenance mode, don't run region normalizer.");
1750      return false;
1751    }
1752    if (!this.regionNormalizerTracker.isNormalizerOn()) {
1753      LOG.debug("Region normalization is disabled, don't run region normalizer.");
1754      return false;
1755    }
1756
1757    synchronized (this.normalizer) {
1758      // Don't run the normalizer concurrently
1759      List<TableName> allEnabledTables = new ArrayList<>(
1760        this.tableStateManager.getTablesInStates(TableState.State.ENABLED));
1761
1762      Collections.shuffle(allEnabledTables);
1763
1764      for (TableName table : allEnabledTables) {
1765        if (isInMaintenanceMode()) {
1766          LOG.debug("Master is in maintenance mode, stop running region normalizer.");
1767          return false;
1768        }
1769
1770        TableDescriptor tblDesc = getTableDescriptors().get(table);
1771        if (table.isSystemTable() || (tblDesc != null &&
1772            !tblDesc.isNormalizationEnabled())) {
1773          LOG.trace("Skipping normalization for {}, as it's either system"
1774              + " table or doesn't have auto normalization turned on", table);
1775          continue;
1776        }
1777        List<NormalizationPlan> plans = this.normalizer.computePlanForTable(table);
1778        if (plans != null) {
1779          for (NormalizationPlan plan : plans) {
1780            plan.execute(clusterConnection.getAdmin());
1781            if (plan.getType() == PlanType.SPLIT) {
1782              splitPlanCount++;
1783            } else if (plan.getType() == PlanType.MERGE) {
1784              mergePlanCount++;
1785            }
1786          }
1787        }
1788      }
1789    }
1790    // If Region did not generate any plans, it means the cluster is already balanced.
1791    // Return true indicating a success.
1792    return true;
1793  }
1794
1795  /**
1796   * @return Client info for use as prefix on an audit log string; who did an action
1797   */
1798  @Override
1799  public String getClientIdAuditPrefix() {
1800    return "Client=" + RpcServer.getRequestUserName().orElse(null)
1801        + "/" + RpcServer.getRemoteAddress().orElse(null);
1802  }
1803
1804  /**
1805   * Switch for the background CatalogJanitor thread.
1806   * Used for testing.  The thread will continue to run.  It will just be a noop
1807   * if disabled.
1808   * @param b If false, the catalog janitor won't do anything.
1809   */
1810  public void setCatalogJanitorEnabled(final boolean b) {
1811    this.catalogJanitorChore.setEnabled(b);
1812  }
1813
1814  @Override
1815  public long mergeRegions(
1816      final RegionInfo[] regionsToMerge,
1817      final boolean forcible,
1818      final long ng,
1819      final long nonce) throws IOException {
1820    checkInitialized();
1821
1822    final String mergeRegionsStr = Arrays.stream(regionsToMerge).
1823      map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", "));
1824    return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
1825      @Override
1826      protected void run() throws IOException {
1827        getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge);
1828        String aid = getClientIdAuditPrefix();
1829        LOG.info("{} merge regions {}", aid, mergeRegionsStr);
1830        submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(),
1831            regionsToMerge, forcible));
1832        getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge);
1833      }
1834
1835      @Override
1836      protected String getDescription() {
1837        return "MergeTableProcedure";
1838      }
1839    });
1840  }
1841
1842  @Override
1843  public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow,
1844      final long nonceGroup, final long nonce)
1845  throws IOException {
1846    checkInitialized();
1847    return MasterProcedureUtil.submitProcedure(
1848        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
1849      @Override
1850      protected void run() throws IOException {
1851        getMaster().getMasterCoprocessorHost().preSplitRegion(regionInfo.getTable(), splitRow);
1852        LOG.info(getClientIdAuditPrefix() + " split " + regionInfo.getRegionNameAsString());
1853
1854        // Execute the operation asynchronously
1855        submitProcedure(getAssignmentManager().createSplitProcedure(regionInfo, splitRow));
1856      }
1857
1858      @Override
1859      protected String getDescription() {
1860        return "SplitTableProcedure";
1861      }
1862    });
1863  }
1864
1865  // Public so can be accessed by tests. Blocks until move is done.
1866  // Replace with an async implementation from which you can get
1867  // a success/failure result.
1868  @VisibleForTesting
1869  public void move(final byte[] encodedRegionName, byte[] destServerName) throws HBaseIOException {
1870    RegionState regionState = assignmentManager.getRegionStates().
1871      getRegionState(Bytes.toString(encodedRegionName));
1872
1873    RegionInfo hri;
1874    if (regionState != null) {
1875      hri = regionState.getRegion();
1876    } else {
1877      throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1878    }
1879
1880    ServerName dest;
1881    List<ServerName> exclude = hri.getTable().isSystemTable() ? assignmentManager.getExcludedServersForSystemTable()
1882        : new ArrayList<>(1);
1883    if (destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))) {
1884      LOG.info(
1885          Bytes.toString(encodedRegionName) + " can not move to " + Bytes.toString(destServerName)
1886              + " because the server is in exclude list");
1887      destServerName = null;
1888    }
1889    if (destServerName == null || destServerName.length == 0) {
1890      LOG.info("Passed destination servername is null/empty so " +
1891        "choosing a server at random");
1892      exclude.add(regionState.getServerName());
1893      final List<ServerName> destServers = this.serverManager.createDestinationServersList(exclude);
1894      dest = balancer.randomAssignment(hri, destServers);
1895      if (dest == null) {
1896        LOG.debug("Unable to determine a plan to assign " + hri);
1897        return;
1898      }
1899    } else {
1900      ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
1901      dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
1902      if (dest == null) {
1903        LOG.debug("Unable to determine a plan to assign " + hri);
1904        return;
1905      }
1906      // TODO: What is this? I don't get it.
1907      if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer
1908          && !((BaseLoadBalancer)balancer).shouldBeOnMaster(hri)) {
1909        // To avoid unnecessary region moving later by balancer. Don't put user
1910        // regions on master.
1911        LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1912          + " to avoid unnecessary region moving later by load balancer,"
1913          + " because it should not be on master");
1914        return;
1915      }
1916    }
1917
1918    if (dest.equals(regionState.getServerName())) {
1919      LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1920        + " because region already assigned to the same server " + dest + ".");
1921      return;
1922    }
1923
1924    // Now we can do the move
1925    RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1926    assert rp.getDestination() != null: rp.toString() + " " + dest;
1927
1928    try {
1929      checkInitialized();
1930      if (this.cpHost != null) {
1931        this.cpHost.preMove(hri, rp.getSource(), rp.getDestination());
1932      }
1933      // Warmup the region on the destination before initiating the move. this call
1934      // is synchronous and takes some time. doing it before the source region gets
1935      // closed
1936      serverManager.sendRegionWarmup(rp.getDestination(), hri);
1937
1938      LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1939      Future<byte []> future = this.assignmentManager.moveAsync(rp);
1940      try {
1941        // Is this going to work? Will we throw exception on error?
1942        // TODO: CompletableFuture rather than this stunted Future.
1943        future.get();
1944      } catch (InterruptedException | ExecutionException e) {
1945        throw new HBaseIOException(e);
1946      }
1947      if (this.cpHost != null) {
1948        this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1949      }
1950    } catch (IOException ioe) {
1951      if (ioe instanceof HBaseIOException) {
1952        throw (HBaseIOException)ioe;
1953      }
1954      throw new HBaseIOException(ioe);
1955    }
1956  }
1957
1958  @Override
1959  public long createTable(
1960      final TableDescriptor tableDescriptor,
1961      final byte [][] splitKeys,
1962      final long nonceGroup,
1963      final long nonce) throws IOException {
1964    checkInitialized();
1965
1966    String namespace = tableDescriptor.getTableName().getNamespaceAsString();
1967    this.clusterSchemaService.getNamespace(namespace);
1968
1969    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, splitKeys);
1970    sanityCheckTableDescriptor(tableDescriptor);
1971
1972    return MasterProcedureUtil.submitProcedure(
1973        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
1974      @Override
1975      protected void run() throws IOException {
1976        getMaster().getMasterCoprocessorHost().preCreateTable(tableDescriptor, newRegions);
1977
1978        LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
1979
1980        // TODO: We can handle/merge duplicate requests, and differentiate the case of
1981        //       TableExistsException by saying if the schema is the same or not.
1982        //
1983        // We need to wait for the procedure to potentially fail due to "prepare" sanity
1984        // checks. This will block only the beginning of the procedure. See HBASE-19953.
1985        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
1986        submitProcedure(new CreateTableProcedure(
1987            procedureExecutor.getEnvironment(), tableDescriptor, newRegions, latch));
1988        latch.await();
1989
1990        getMaster().getMasterCoprocessorHost().postCreateTable(tableDescriptor, newRegions);
1991      }
1992
1993      @Override
1994      protected String getDescription() {
1995        return "CreateTableProcedure";
1996      }
1997    });
1998  }
1999
2000  @Override
2001  public long createSystemTable(final TableDescriptor tableDescriptor) throws IOException {
2002    if (isStopped()) {
2003      throw new MasterNotRunningException();
2004    }
2005
2006    TableName tableName = tableDescriptor.getTableName();
2007    if (!(tableName.isSystemTable())) {
2008      throw new IllegalArgumentException(
2009        "Only system table creation can use this createSystemTable API");
2010    }
2011
2012    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, null);
2013
2014    LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
2015
2016    // This special create table is called locally to master.  Therefore, no RPC means no need
2017    // to use nonce to detect duplicated RPC call.
2018    long procId = this.procedureExecutor.submitProcedure(
2019      new CreateTableProcedure(procedureExecutor.getEnvironment(), tableDescriptor, newRegions));
2020
2021    return procId;
2022  }
2023
2024  /**
2025   * Checks whether the table conforms to some sane limits, and configured
2026   * values (compression, etc) work. Throws an exception if something is wrong.
2027   * @throws IOException
2028   */
2029  private void sanityCheckTableDescriptor(final TableDescriptor htd) throws IOException {
2030    final String CONF_KEY = "hbase.table.sanity.checks";
2031    boolean logWarn = false;
2032    if (!conf.getBoolean(CONF_KEY, true)) {
2033      logWarn = true;
2034    }
2035    String tableVal = htd.getValue(CONF_KEY);
2036    if (tableVal != null && !Boolean.valueOf(tableVal)) {
2037      logWarn = true;
2038    }
2039
2040    // check max file size
2041    long maxFileSizeLowerLimit = 2 * 1024 * 1024L; // 2M is the default lower limit
2042    long maxFileSize = htd.getMaxFileSize();
2043    if (maxFileSize < 0) {
2044      maxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, maxFileSizeLowerLimit);
2045    }
2046    if (maxFileSize < conf.getLong("hbase.hregion.max.filesize.limit", maxFileSizeLowerLimit)) {
2047      String message = "MAX_FILESIZE for table descriptor or "
2048          + "\"hbase.hregion.max.filesize\" (" + maxFileSize
2049          + ") is too small, which might cause over splitting into unmanageable "
2050          + "number of regions.";
2051      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2052    }
2053
2054    // check flush size
2055    long flushSizeLowerLimit = 1024 * 1024L; // 1M is the default lower limit
2056    long flushSize = htd.getMemStoreFlushSize();
2057    if (flushSize < 0) {
2058      flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeLowerLimit);
2059    }
2060    if (flushSize < conf.getLong("hbase.hregion.memstore.flush.size.limit", flushSizeLowerLimit)) {
2061      String message = "MEMSTORE_FLUSHSIZE for table descriptor or "
2062          + "\"hbase.hregion.memstore.flush.size\" ("+flushSize+") is too small, which might cause"
2063          + " very frequent flushing.";
2064      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2065    }
2066
2067    // check that coprocessors and other specified plugin classes can be loaded
2068    try {
2069      checkClassLoading(conf, htd);
2070    } catch (Exception ex) {
2071      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, ex.getMessage(), null);
2072    }
2073
2074    // check compression can be loaded
2075    try {
2076      checkCompression(htd);
2077    } catch (IOException e) {
2078      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, e.getMessage(), e);
2079    }
2080
2081    // check encryption can be loaded
2082    try {
2083      checkEncryption(conf, htd);
2084    } catch (IOException e) {
2085      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, e.getMessage(), e);
2086    }
2087    // Verify compaction policy
2088    try{
2089      checkCompactionPolicy(conf, htd);
2090    } catch(IOException e){
2091      warnOrThrowExceptionForFailure(false, CONF_KEY, e.getMessage(), e);
2092    }
2093    // check that we have at least 1 CF
2094    if (htd.getColumnFamilyCount() == 0) {
2095      String message = "Table should have at least one column family.";
2096      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2097    }
2098
2099    // check that we have minimum 1 region replicas
2100    int regionReplicas = htd.getRegionReplication();
2101    if (regionReplicas < 1) {
2102      String message = "Table region replication should be at least one.";
2103      warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2104    }
2105
2106    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
2107      if (hcd.getTimeToLive() <= 0) {
2108        String message = "TTL for column family " + hcd.getNameAsString() + " must be positive.";
2109        warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2110      }
2111
2112      // check blockSize
2113      if (hcd.getBlocksize() < 1024 || hcd.getBlocksize() > 16 * 1024 * 1024) {
2114        String message = "Block size for column family " + hcd.getNameAsString()
2115            + "  must be between 1K and 16MB.";
2116        warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2117      }
2118
2119      // check versions
2120      if (hcd.getMinVersions() < 0) {
2121        String message = "Min versions for column family " + hcd.getNameAsString()
2122          + "  must be positive.";
2123        warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2124      }
2125      // max versions already being checked
2126
2127      // HBASE-13776 Setting illegal versions for ColumnFamilyDescriptor
2128      //  does not throw IllegalArgumentException
2129      // check minVersions <= maxVerions
2130      if (hcd.getMinVersions() > hcd.getMaxVersions()) {
2131        String message = "Min versions for column family " + hcd.getNameAsString()
2132            + " must be less than the Max versions.";
2133        warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2134      }
2135
2136      // check replication scope
2137      checkReplicationScope(hcd);
2138
2139      // check data replication factor, it can be 0(default value) when user has not explicitly
2140      // set the value, in this case we use default replication factor set in the file system.
2141      if (hcd.getDFSReplication() < 0) {
2142        String message = "HFile Replication for column family " + hcd.getNameAsString()
2143            + "  must be greater than zero.";
2144        warnOrThrowExceptionForFailure(logWarn, CONF_KEY, message, null);
2145      }
2146
2147      // TODO: should we check coprocessors and encryption ?
2148    }
2149  }
2150
2151  private void checkReplicationScope(ColumnFamilyDescriptor hcd) throws IOException{
2152    // check replication scope
2153    WALProtos.ScopeType scop = WALProtos.ScopeType.valueOf(hcd.getScope());
2154    if (scop == null) {
2155      String message = "Replication scope for column family "
2156          + hcd.getNameAsString() + " is " + hcd.getScope() + " which is invalid.";
2157
2158      LOG.error(message);
2159      throw new DoNotRetryIOException(message);
2160    }
2161  }
2162
2163  private void checkCompactionPolicy(Configuration conf, TableDescriptor htd)
2164      throws IOException {
2165    // FIFO compaction has some requirements
2166    // Actually FCP ignores periodic major compactions
2167    String className = htd.getValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
2168    if (className == null) {
2169      className =
2170          conf.get(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY,
2171            ExploringCompactionPolicy.class.getName());
2172    }
2173
2174    int blockingFileCount = HStore.DEFAULT_BLOCKING_STOREFILE_COUNT;
2175    String sv = htd.getValue(HStore.BLOCKING_STOREFILES_KEY);
2176    if (sv != null) {
2177      blockingFileCount = Integer.parseInt(sv);
2178    } else {
2179      blockingFileCount = conf.getInt(HStore.BLOCKING_STOREFILES_KEY, blockingFileCount);
2180    }
2181
2182    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
2183      String compactionPolicy =
2184          hcd.getConfigurationValue(DefaultStoreEngine.DEFAULT_COMPACTION_POLICY_CLASS_KEY);
2185      if (compactionPolicy == null) {
2186        compactionPolicy = className;
2187      }
2188      if (!compactionPolicy.equals(FIFOCompactionPolicy.class.getName())) {
2189        continue;
2190      }
2191      // FIFOCompaction
2192      String message = null;
2193
2194      // 1. Check TTL
2195      if (hcd.getTimeToLive() == ColumnFamilyDescriptorBuilder.DEFAULT_TTL) {
2196        message = "Default TTL is not supported for FIFO compaction";
2197        throw new IOException(message);
2198      }
2199
2200      // 2. Check min versions
2201      if (hcd.getMinVersions() > 0) {
2202        message = "MIN_VERSION > 0 is not supported for FIFO compaction";
2203        throw new IOException(message);
2204      }
2205
2206      // 3. blocking file count
2207      sv = hcd.getConfigurationValue(HStore.BLOCKING_STOREFILES_KEY);
2208      if (sv != null) {
2209        blockingFileCount = Integer.parseInt(sv);
2210      }
2211      if (blockingFileCount < 1000) {
2212        message =
2213            "Blocking file count '" + HStore.BLOCKING_STOREFILES_KEY + "' " + blockingFileCount
2214                + " is below recommended minimum of 1000 for column family "+ hcd.getNameAsString();
2215        throw new IOException(message);
2216      }
2217    }
2218  }
2219
2220  // HBASE-13350 - Helper method to log warning on sanity check failures if checks disabled.
2221  private static void warnOrThrowExceptionForFailure(boolean logWarn, String confKey,
2222      String message, Exception cause) throws IOException {
2223    if (!logWarn) {
2224      throw new DoNotRetryIOException(message + " Set " + confKey +
2225          " to false at conf or table descriptor if you want to bypass sanity checks", cause);
2226    }
2227    LOG.warn(message);
2228  }
2229
2230  private void startActiveMasterManager(int infoPort) throws KeeperException {
2231    String backupZNode = ZNodePaths.joinZNode(
2232      zooKeeper.getZNodePaths().backupMasterAddressesZNode, serverName.toString());
2233    /*
2234    * Add a ZNode for ourselves in the backup master directory since we
2235    * may not become the active master. If so, we want the actual active
2236    * master to know we are backup masters, so that it won't assign
2237    * regions to us if so configured.
2238    *
2239    * If we become the active master later, ActiveMasterManager will delete
2240    * this node explicitly.  If we crash before then, ZooKeeper will delete
2241    * this node for us since it is ephemeral.
2242    */
2243    LOG.info("Adding backup master ZNode " + backupZNode);
2244    if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
2245      LOG.warn("Failed create of " + backupZNode + " by " + serverName);
2246    }
2247    this.activeMasterManager.setInfoPort(infoPort);
2248    int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
2249    // If we're a backup master, stall until a primary to write this address
2250    if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
2251      LOG.debug("HMaster started in backup mode. Stalling until master znode is written.");
2252      // This will only be a minute or so while the cluster starts up,
2253      // so don't worry about setting watches on the parent znode
2254      while (!activeMasterManager.hasActiveMaster()) {
2255        LOG.debug("Waiting for master address and cluster state znode to be written.");
2256        Threads.sleep(timeout);
2257      }
2258    }
2259    MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
2260    status.setDescription("Master startup");
2261    try {
2262      if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
2263        finishActiveMasterInitialization(status);
2264      }
2265    } catch (Throwable t) {
2266      status.setStatus("Failed to become active: " + t.getMessage());
2267      LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
2268      // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
2269      if (t instanceof NoClassDefFoundError && t.getMessage().
2270          contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")) {
2271        // improved error message for this special case
2272        abort("HBase is having a problem with its Hadoop jars.  You may need to recompile " +
2273          "HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion() +
2274          " or change your hadoop jars to start properly", t);
2275      } else {
2276        abort("Unhandled exception. Starting shutdown.", t);
2277      }
2278    } finally {
2279      status.cleanup();
2280    }
2281  }
2282
2283  private void checkCompression(final TableDescriptor htd)
2284  throws IOException {
2285    if (!this.masterCheckCompression) return;
2286    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
2287      checkCompression(hcd);
2288    }
2289  }
2290
2291  private void checkCompression(final ColumnFamilyDescriptor hcd)
2292  throws IOException {
2293    if (!this.masterCheckCompression) return;
2294    CompressionTest.testCompression(hcd.getCompressionType());
2295    CompressionTest.testCompression(hcd.getCompactionCompressionType());
2296  }
2297
2298  private void checkEncryption(final Configuration conf, final TableDescriptor htd)
2299  throws IOException {
2300    if (!this.masterCheckEncryption) return;
2301    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
2302      checkEncryption(conf, hcd);
2303    }
2304  }
2305
2306  private void checkEncryption(final Configuration conf, final ColumnFamilyDescriptor hcd)
2307  throws IOException {
2308    if (!this.masterCheckEncryption) return;
2309    EncryptionTest.testEncryption(conf, hcd.getEncryptionType(), hcd.getEncryptionKey());
2310  }
2311
2312  private void checkClassLoading(final Configuration conf, final TableDescriptor htd)
2313  throws IOException {
2314    RegionSplitPolicy.getSplitPolicyClass(htd, conf);
2315    RegionCoprocessorHost.testTableCoprocessorAttrs(conf, htd);
2316  }
2317
2318  private static boolean isCatalogTable(final TableName tableName) {
2319    return tableName.equals(TableName.META_TABLE_NAME);
2320  }
2321
2322  @Override
2323  public long deleteTable(
2324      final TableName tableName,
2325      final long nonceGroup,
2326      final long nonce) throws IOException {
2327    checkInitialized();
2328
2329    return MasterProcedureUtil.submitProcedure(
2330        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2331      @Override
2332      protected void run() throws IOException {
2333        getMaster().getMasterCoprocessorHost().preDeleteTable(tableName);
2334
2335        LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
2336
2337        // TODO: We can handle/merge duplicate request
2338        //
2339        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2340        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2341        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2342        submitProcedure(new DeleteTableProcedure(procedureExecutor.getEnvironment(),
2343            tableName, latch));
2344        latch.await();
2345
2346        getMaster().getMasterCoprocessorHost().postDeleteTable(tableName);
2347      }
2348
2349      @Override
2350      protected String getDescription() {
2351        return "DeleteTableProcedure";
2352      }
2353    });
2354  }
2355
2356  @Override
2357  public long truncateTable(
2358      final TableName tableName,
2359      final boolean preserveSplits,
2360      final long nonceGroup,
2361      final long nonce) throws IOException {
2362    checkInitialized();
2363
2364    return MasterProcedureUtil.submitProcedure(
2365        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2366      @Override
2367      protected void run() throws IOException {
2368        getMaster().getMasterCoprocessorHost().preTruncateTable(tableName);
2369
2370        LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
2371        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
2372        submitProcedure(new TruncateTableProcedure(procedureExecutor.getEnvironment(),
2373            tableName, preserveSplits, latch));
2374        latch.await();
2375
2376        getMaster().getMasterCoprocessorHost().postTruncateTable(tableName);
2377      }
2378
2379      @Override
2380      protected String getDescription() {
2381        return "TruncateTableProcedure";
2382      }
2383    });
2384  }
2385
2386  @Override
2387  public long addColumn(final TableName tableName, final ColumnFamilyDescriptor column,
2388      final long nonceGroup, final long nonce) throws IOException {
2389    checkInitialized();
2390    checkTableExists(tableName);
2391
2392    return modifyTable(tableName, new TableDescriptorGetter() {
2393
2394      @Override
2395      public TableDescriptor get() throws IOException {
2396        TableDescriptor old = getTableDescriptors().get(tableName);
2397        if (old.hasColumnFamily(column.getName())) {
2398          throw new InvalidFamilyOperationException("Column family '" + column.getNameAsString()
2399              + "' in table '" + tableName + "' already exists so cannot be added");
2400        }
2401
2402        return TableDescriptorBuilder.newBuilder(old).setColumnFamily(column).build();
2403      }
2404    }, nonceGroup, nonce);
2405  }
2406
2407  /**
2408   * Implement to return TableDescriptor after pre-checks
2409   */
2410  protected interface TableDescriptorGetter {
2411    TableDescriptor get() throws IOException;
2412  }
2413
2414  @Override
2415  public long modifyColumn(final TableName tableName, final ColumnFamilyDescriptor descriptor,
2416      final long nonceGroup, final long nonce) throws IOException {
2417    checkInitialized();
2418    checkTableExists(tableName);
2419    return modifyTable(tableName, new TableDescriptorGetter() {
2420
2421      @Override
2422      public TableDescriptor get() throws IOException {
2423        TableDescriptor old = getTableDescriptors().get(tableName);
2424        if (!old.hasColumnFamily(descriptor.getName())) {
2425          throw new InvalidFamilyOperationException("Family '" + descriptor.getNameAsString()
2426              + "' does not exist, so it cannot be modified");
2427        }
2428
2429        return TableDescriptorBuilder.newBuilder(old).modifyColumnFamily(descriptor).build();
2430      }
2431    }, nonceGroup, nonce);
2432  }
2433
2434  @Override
2435  public long deleteColumn(final TableName tableName, final byte[] columnName,
2436      final long nonceGroup, final long nonce) throws IOException {
2437    checkInitialized();
2438    checkTableExists(tableName);
2439
2440    return modifyTable(tableName, new TableDescriptorGetter() {
2441
2442      @Override
2443      public TableDescriptor get() throws IOException {
2444        TableDescriptor old = getTableDescriptors().get(tableName);
2445
2446        if (!old.hasColumnFamily(columnName)) {
2447          throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2448              + "' does not exist, so it cannot be deleted");
2449        }
2450        if (old.getColumnFamilyCount() == 1) {
2451          throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2452              + "' is the only column family in the table, so it cannot be deleted");
2453        }
2454        return TableDescriptorBuilder.newBuilder(old).removeColumnFamily(columnName).build();
2455      }
2456    }, nonceGroup, nonce);
2457  }
2458
2459  @Override
2460  public long enableTable(final TableName tableName, final long nonceGroup, final long nonce)
2461      throws IOException {
2462    checkInitialized();
2463
2464    return MasterProcedureUtil.submitProcedure(
2465        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2466      @Override
2467      protected void run() throws IOException {
2468        getMaster().getMasterCoprocessorHost().preEnableTable(tableName);
2469
2470        // Normally, it would make sense for this authorization check to exist inside
2471        // AccessController, but because the authorization check is done based on internal state
2472        // (rather than explicit permissions) we'll do the check here instead of in the
2473        // coprocessor.
2474        MasterQuotaManager quotaManager = getMasterQuotaManager();
2475        if (quotaManager != null) {
2476          if (quotaManager.isQuotaInitialized()) {
2477              SpaceQuotaSnapshot currSnapshotOfTable =
2478                  QuotaTableUtil.getCurrentSnapshotFromQuotaTable(getConnection(), tableName);
2479              if (currSnapshotOfTable != null) {
2480                SpaceQuotaStatus quotaStatus = currSnapshotOfTable.getQuotaStatus();
2481                if (quotaStatus.isInViolation()
2482                    && SpaceViolationPolicy.DISABLE == quotaStatus.getPolicy()) {
2483                throw new AccessDeniedException("Enabling the table '" + tableName
2484                    + "' is disallowed due to a violated space quota.");
2485              }
2486            }
2487          } else if (LOG.isTraceEnabled()) {
2488            LOG.trace("Unable to check for space quotas as the MasterQuotaManager is not enabled");
2489          }
2490        }
2491
2492        LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
2493
2494        // Execute the operation asynchronously - client will check the progress of the operation
2495        // In case the request is from a <1.1 client before returning,
2496        // we want to make sure that the table is prepared to be
2497        // enabled (the table is locked and the table state is set).
2498        // Note: if the procedure throws exception, we will catch it and rethrow.
2499        final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
2500        submitProcedure(new EnableTableProcedure(procedureExecutor.getEnvironment(),
2501            tableName, false, prepareLatch));
2502        prepareLatch.await();
2503
2504        getMaster().getMasterCoprocessorHost().postEnableTable(tableName);
2505      }
2506
2507      @Override
2508      protected String getDescription() {
2509        return "EnableTableProcedure";
2510      }
2511    });
2512  }
2513
2514  @Override
2515  public long disableTable(final TableName tableName, final long nonceGroup, final long nonce)
2516      throws IOException {
2517    checkInitialized();
2518
2519    return MasterProcedureUtil.submitProcedure(
2520        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2521      @Override
2522      protected void run() throws IOException {
2523        getMaster().getMasterCoprocessorHost().preDisableTable(tableName);
2524
2525        LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
2526
2527        // Execute the operation asynchronously - client will check the progress of the operation
2528        // In case the request is from a <1.1 client before returning,
2529        // we want to make sure that the table is prepared to be
2530        // enabled (the table is locked and the table state is set).
2531        // Note: if the procedure throws exception, we will catch it and rethrow.
2532        //
2533        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2534        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2535        final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createBlockingLatch();
2536        submitProcedure(new DisableTableProcedure(procedureExecutor.getEnvironment(),
2537            tableName, false, prepareLatch));
2538        prepareLatch.await();
2539
2540        getMaster().getMasterCoprocessorHost().postDisableTable(tableName);
2541      }
2542
2543      @Override
2544      protected String getDescription() {
2545        return "DisableTableProcedure";
2546      }
2547    });
2548  }
2549
2550  private long modifyTable(final TableName tableName,
2551      final TableDescriptorGetter newDescriptorGetter, final long nonceGroup, final long nonce)
2552      throws IOException {
2553    return MasterProcedureUtil
2554        .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2555          @Override
2556          protected void run() throws IOException {
2557            TableDescriptor newDescriptor = newDescriptorGetter.get();
2558            sanityCheckTableDescriptor(newDescriptor);
2559            TableDescriptor oldDescriptor = getMaster().getTableDescriptors().get(tableName);
2560            getMaster().getMasterCoprocessorHost().preModifyTable(tableName, oldDescriptor,
2561              newDescriptor);
2562
2563            LOG.info(getClientIdAuditPrefix() + " modify " + tableName);
2564
2565            // Execute the operation synchronously - wait for the operation completes before
2566            // continuing.
2567            //
2568            // We need to wait for the procedure to potentially fail due to "prepare" sanity
2569            // checks. This will block only the beginning of the procedure. See HBASE-19953.
2570            ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2571            submitProcedure(
2572              new ModifyTableProcedure(procedureExecutor.getEnvironment(), newDescriptor, latch));
2573            latch.await();
2574
2575            getMaster().getMasterCoprocessorHost().postModifyTable(tableName, oldDescriptor,
2576              newDescriptor);
2577          }
2578
2579          @Override
2580          protected String getDescription() {
2581            return "ModifyTableProcedure";
2582          }
2583        });
2584
2585  }
2586
2587  @Override
2588  public long modifyTable(final TableName tableName, final TableDescriptor newDescriptor,
2589      final long nonceGroup, final long nonce) throws IOException {
2590    checkInitialized();
2591    return modifyTable(tableName, new TableDescriptorGetter() {
2592      @Override
2593      public TableDescriptor get() throws IOException {
2594        return newDescriptor;
2595      }
2596    }, nonceGroup, nonce);
2597
2598  }
2599
2600  public long restoreSnapshot(final SnapshotDescription snapshotDesc,
2601      final long nonceGroup, final long nonce, final boolean restoreAcl) throws IOException {
2602    checkInitialized();
2603    getSnapshotManager().checkSnapshotSupport();
2604
2605    // Ensure namespace exists. Will throw exception if non-known NS.
2606    final TableName dstTable = TableName.valueOf(snapshotDesc.getTable());
2607    getClusterSchema().getNamespace(dstTable.getNamespaceAsString());
2608
2609    return MasterProcedureUtil.submitProcedure(
2610        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2611      @Override
2612      protected void run() throws IOException {
2613          setProcId(
2614            getSnapshotManager().restoreOrCloneSnapshot(snapshotDesc, getNonceKey(), restoreAcl));
2615      }
2616
2617      @Override
2618      protected String getDescription() {
2619        return "RestoreSnapshotProcedure";
2620      }
2621    });
2622  }
2623
2624  private void checkTableExists(final TableName tableName)
2625      throws IOException, TableNotFoundException {
2626    if (!MetaTableAccessor.tableExists(getConnection(), tableName)) {
2627      throw new TableNotFoundException(tableName);
2628    }
2629  }
2630
2631  @Override
2632  public void checkTableModifiable(final TableName tableName)
2633      throws IOException, TableNotFoundException, TableNotDisabledException {
2634    if (isCatalogTable(tableName)) {
2635      throw new IOException("Can't modify catalog tables");
2636    }
2637    checkTableExists(tableName);
2638    TableState ts = getTableStateManager().getTableState(tableName);
2639    if (!ts.isDisabled()) {
2640      throw new TableNotDisabledException("Not DISABLED; " + ts);
2641    }
2642  }
2643
2644  public ClusterMetrics getClusterMetricsWithoutCoprocessor() throws InterruptedIOException {
2645    return getClusterMetricsWithoutCoprocessor(EnumSet.allOf(Option.class));
2646  }
2647
2648  public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options)
2649      throws InterruptedIOException {
2650    ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
2651    // given that hbase1 can't submit the request with Option,
2652    // we return all information to client if the list of Option is empty.
2653    if (options.isEmpty()) {
2654      options = EnumSet.allOf(Option.class);
2655    }
2656
2657    for (Option opt : options) {
2658      switch (opt) {
2659        case HBASE_VERSION: builder.setHBaseVersion(VersionInfo.getVersion()); break;
2660        case CLUSTER_ID: builder.setClusterId(getClusterId()); break;
2661        case MASTER: builder.setMasterName(getServerName()); break;
2662        case BACKUP_MASTERS: builder.setBackerMasterNames(getBackupMasters()); break;
2663        case LIVE_SERVERS: {
2664          if (serverManager != null) {
2665            builder.setLiveServerMetrics(serverManager.getOnlineServers().entrySet().stream()
2666              .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
2667          }
2668          break;
2669        }
2670        case DEAD_SERVERS: {
2671          if (serverManager != null) {
2672            builder.setDeadServerNames(new ArrayList<>(
2673              serverManager.getDeadServers().copyServerNames()));
2674          }
2675          break;
2676        }
2677        case MASTER_COPROCESSORS: {
2678          if (cpHost != null) {
2679            builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
2680          }
2681          break;
2682        }
2683        case REGIONS_IN_TRANSITION: {
2684          if (assignmentManager != null) {
2685            builder.setRegionsInTransition(assignmentManager.getRegionStates()
2686                .getRegionsStateInTransition());
2687          }
2688          break;
2689        }
2690        case BALANCER_ON: {
2691          if (loadBalancerTracker != null) {
2692            builder.setBalancerOn(loadBalancerTracker.isBalancerOn());
2693          }
2694          break;
2695        }
2696        case MASTER_INFO_PORT: {
2697          if (infoServer != null) {
2698            builder.setMasterInfoPort(infoServer.getPort());
2699          }
2700          break;
2701        }
2702        case TABLE_TO_REGIONS_COUNT: {
2703          if (isActiveMaster() && isInitialized() && assignmentManager != null) {
2704            try {
2705              Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
2706              Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
2707              for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
2708                TableName tableName = tableDescriptor.getTableName();
2709                RegionStatesCount regionStatesCount = assignmentManager
2710                  .getRegionStatesCount(tableName);
2711                tableRegionStatesCountMap.put(tableName, regionStatesCount);
2712              }
2713              builder.setTableRegionStatesCount(tableRegionStatesCountMap);
2714            } catch (IOException e) {
2715              LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
2716            }
2717          }
2718          break;
2719        }
2720      }
2721    }
2722    return builder.build();
2723  }
2724
2725  /**
2726   * @return cluster status
2727   */
2728  public ClusterMetrics getClusterMetrics() throws IOException {
2729    return getClusterMetrics(EnumSet.allOf(Option.class));
2730  }
2731
2732  public ClusterMetrics getClusterMetrics(EnumSet<Option> options) throws IOException {
2733    if (cpHost != null) {
2734      cpHost.preGetClusterMetrics();
2735    }
2736    ClusterMetrics status = getClusterMetricsWithoutCoprocessor(options);
2737    if (cpHost != null) {
2738      cpHost.postGetClusterMetrics(status);
2739    }
2740    return status;
2741  }
2742
2743  private List<ServerName> getBackupMasters() throws InterruptedIOException {
2744    // Build Set of backup masters from ZK nodes
2745    List<String> backupMasterStrings;
2746    try {
2747      backupMasterStrings = ZKUtil.listChildrenNoWatch(this.zooKeeper,
2748        this.zooKeeper.getZNodePaths().backupMasterAddressesZNode);
2749    } catch (KeeperException e) {
2750      LOG.warn(this.zooKeeper.prefix("Unable to list backup servers"), e);
2751      backupMasterStrings = null;
2752    }
2753
2754    List<ServerName> backupMasters = Collections.emptyList();
2755    if (backupMasterStrings != null && !backupMasterStrings.isEmpty()) {
2756      backupMasters = new ArrayList<>(backupMasterStrings.size());
2757      for (String s: backupMasterStrings) {
2758        try {
2759          byte [] bytes;
2760          try {
2761            bytes = ZKUtil.getData(this.zooKeeper, ZNodePaths.joinZNode(
2762                this.zooKeeper.getZNodePaths().backupMasterAddressesZNode, s));
2763          } catch (InterruptedException e) {
2764            throw new InterruptedIOException();
2765          }
2766          if (bytes != null) {
2767            ServerName sn;
2768            try {
2769              sn = ProtobufUtil.parseServerNameFrom(bytes);
2770            } catch (DeserializationException e) {
2771              LOG.warn("Failed parse, skipping registering backup server", e);
2772              continue;
2773            }
2774            backupMasters.add(sn);
2775          }
2776        } catch (KeeperException e) {
2777          LOG.warn(this.zooKeeper.prefix("Unable to get information about " +
2778                   "backup servers"), e);
2779        }
2780      }
2781      Collections.sort(backupMasters, new Comparator<ServerName>() {
2782        @Override
2783        public int compare(ServerName s1, ServerName s2) {
2784          return s1.getServerName().compareTo(s2.getServerName());
2785        }});
2786    }
2787    return backupMasters;
2788  }
2789
2790  /**
2791   * The set of loaded coprocessors is stored in a static set. Since it's
2792   * statically allocated, it does not require that HMaster's cpHost be
2793   * initialized prior to accessing it.
2794   * @return a String representation of the set of names of the loaded coprocessors.
2795   */
2796  public static String getLoadedCoprocessors() {
2797    return CoprocessorHost.getLoadedCoprocessors().toString();
2798  }
2799
2800  /**
2801   * @return timestamp in millis when HMaster was started.
2802   */
2803  public long getMasterStartTime() {
2804    return startcode;
2805  }
2806
2807  /**
2808   * @return timestamp in millis when HMaster became the active master.
2809   */
2810  public long getMasterActiveTime() {
2811    return masterActiveTime;
2812  }
2813
2814  /**
2815   * @return timestamp in millis when HMaster finished becoming the active master
2816   */
2817  public long getMasterFinishedInitializationTime() {
2818    return masterFinishedInitializationTime;
2819  }
2820
2821  public int getNumWALFiles() {
2822    return procedureStore != null ? procedureStore.getActiveLogs().size() : 0;
2823  }
2824
2825  public WALProcedureStore getWalProcedureStore() {
2826    return procedureStore;
2827  }
2828
2829  public int getRegionServerInfoPort(final ServerName sn) {
2830    int port = this.serverManager.getInfoPort(sn);
2831    return port == 0 ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2832      HConstants.DEFAULT_REGIONSERVER_INFOPORT) : port;
2833  }
2834
2835  @Override
2836  public String getRegionServerVersion(ServerName sn) {
2837    // Will return "0.0.0" if the server is not online to prevent move system region to unknown
2838    // version RS.
2839    return this.serverManager.getVersion(sn);
2840  }
2841
2842  @Override
2843  public void checkIfShouldMoveSystemRegionAsync() {
2844    assignmentManager.checkIfShouldMoveSystemRegionAsync();
2845  }
2846
2847  /**
2848   * @return array of coprocessor SimpleNames.
2849   */
2850  public String[] getMasterCoprocessors() {
2851    Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
2852    return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
2853  }
2854
2855  @Override
2856  public void abort(String reason, Throwable cause) {
2857    if (isAborted() || isStopped()) {
2858      return;
2859    }
2860    setAbortRequested();
2861    if (cpHost != null) {
2862      // HBASE-4014: dump a list of loaded coprocessors.
2863      LOG.error(HBaseMarkers.FATAL, "Master server abort: loaded coprocessors are: " +
2864          getLoadedCoprocessors());
2865    }
2866    String msg = "***** ABORTING master " + this + ": " + reason + " *****";
2867    if (cause != null) {
2868      LOG.error(HBaseMarkers.FATAL, msg, cause);
2869    } else {
2870      LOG.error(HBaseMarkers.FATAL, msg);
2871    }
2872
2873    try {
2874      stopMaster();
2875    } catch (IOException e) {
2876      LOG.error("Exception occurred while stopping master", e);
2877    }
2878  }
2879
2880  @Override
2881  public ZKWatcher getZooKeeper() {
2882    return zooKeeper;
2883  }
2884
2885  @Override
2886  public MasterCoprocessorHost getMasterCoprocessorHost() {
2887    return cpHost;
2888  }
2889
2890  @Override
2891  public MasterQuotaManager getMasterQuotaManager() {
2892    return quotaManager;
2893  }
2894
2895  @Override
2896  public ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
2897    return procedureExecutor;
2898  }
2899
2900  @Override
2901  public ServerName getServerName() {
2902    return this.serverName;
2903  }
2904
2905  @Override
2906  public AssignmentManager getAssignmentManager() {
2907    return this.assignmentManager;
2908  }
2909
2910  @Override
2911  public CatalogJanitor getCatalogJanitor() {
2912    return this.catalogJanitorChore;
2913  }
2914
2915  public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2916    return rsFatals;
2917  }
2918
2919  /**
2920   * Shutdown the cluster.
2921   * Master runs a coordinated stop of all RegionServers and then itself.
2922   */
2923  public void shutdown() throws IOException {
2924    if (cpHost != null) {
2925      cpHost.preShutdown();
2926    }
2927    // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
2928    // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
2929    // the cluster status as down. RegionServers will notice this change in state and will start
2930    // shutting themselves down. When last has exited, Master can go down.
2931    if (this.serverManager != null) {
2932      this.serverManager.shutdownCluster();
2933    }
2934    if (this.clusterStatusTracker != null) {
2935      try {
2936        this.clusterStatusTracker.setClusterDown();
2937      } catch (KeeperException e) {
2938        LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2939      }
2940    }
2941    // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
2942    // processing so we can go down.
2943    if (this.procedureExecutor != null) {
2944      this.procedureExecutor.stop();
2945    }
2946    // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
2947    // this is what we want especially if the Master is in startup phase doing call outs to
2948    // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
2949    // the rpc to timeout.
2950    if (this.clusterConnection != null) {
2951      this.clusterConnection.close();
2952    }
2953  }
2954
2955  public void stopMaster() throws IOException {
2956    if (cpHost != null) {
2957      cpHost.preStopMaster();
2958    }
2959    stop("Stopped by " + Thread.currentThread().getName());
2960  }
2961
2962  @Override
2963  public void stop(String msg) {
2964    if (!isStopped()) {
2965      super.stop(msg);
2966      if (this.activeMasterManager != null) {
2967        this.activeMasterManager.stop();
2968      }
2969    }
2970  }
2971
2972  @VisibleForTesting
2973  protected void checkServiceStarted() throws ServerNotRunningYetException {
2974    if (!serviceStarted) {
2975      throw new ServerNotRunningYetException("Server is not running yet");
2976    }
2977  }
2978
2979  public static class MasterStoppedException extends DoNotRetryIOException {
2980    MasterStoppedException() {
2981      super();
2982    }
2983  }
2984
2985  void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException,
2986      MasterNotRunningException, MasterStoppedException {
2987    checkServiceStarted();
2988    if (!isInitialized()) {
2989      throw new PleaseHoldException("Master is initializing");
2990    }
2991    if (isStopped()) {
2992      throw new MasterStoppedException();
2993    }
2994  }
2995
2996  /**
2997   * Report whether this master is currently the active master or not.
2998   * If not active master, we are parked on ZK waiting to become active.
2999   *
3000   * This method is used for testing.
3001   *
3002   * @return true if active master, false if not.
3003   */
3004  @Override
3005  public boolean isActiveMaster() {
3006    return activeMaster;
3007  }
3008
3009  /**
3010   * Report whether this master has completed with its initialization and is
3011   * ready.  If ready, the master is also the active master.  A standby master
3012   * is never ready.
3013   *
3014   * This method is used for testing.
3015   *
3016   * @return true if master is ready to go, false if not.
3017   */
3018  @Override
3019  public boolean isInitialized() {
3020    return initialized.isReady();
3021  }
3022
3023  /**
3024   * Report whether this master is in maintenance mode.
3025   *
3026   * @return true if master is in maintenanceMode
3027   */
3028  @Override
3029  public boolean isInMaintenanceMode() {
3030    return maintenanceMode;
3031  }
3032
3033  @VisibleForTesting
3034  public void setInitialized(boolean isInitialized) {
3035    procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
3036  }
3037
3038  @Override
3039  public ProcedureEvent<?> getInitializedEvent() {
3040    return initialized;
3041  }
3042
3043  /**
3044   * Compute the average load across all region servers.
3045   * Currently, this uses a very naive computation - just uses the number of
3046   * regions being served, ignoring stats about number of requests.
3047   * @return the average load
3048   */
3049  public double getAverageLoad() {
3050    if (this.assignmentManager == null) {
3051      return 0;
3052    }
3053
3054    RegionStates regionStates = this.assignmentManager.getRegionStates();
3055    if (regionStates == null) {
3056      return 0;
3057    }
3058    return regionStates.getAverageLoad();
3059  }
3060
3061  /*
3062   * @return the count of region split plans executed
3063   */
3064  public long getSplitPlanCount() {
3065    return splitPlanCount;
3066  }
3067
3068  /*
3069   * @return the count of region merge plans executed
3070   */
3071  public long getMergePlanCount() {
3072    return mergePlanCount;
3073  }
3074
3075  @Override
3076  public boolean registerService(Service instance) {
3077    /*
3078     * No stacking of instances is allowed for a single service name
3079     */
3080    Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
3081    String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
3082    if (coprocessorServiceHandlers.containsKey(serviceName)) {
3083      LOG.error("Coprocessor service "+serviceName+
3084          " already registered, rejecting request from "+instance
3085      );
3086      return false;
3087    }
3088
3089    coprocessorServiceHandlers.put(serviceName, instance);
3090    if (LOG.isDebugEnabled()) {
3091      LOG.debug("Registered master coprocessor service: service="+serviceName);
3092    }
3093    return true;
3094  }
3095
3096  /**
3097   * Utility for constructing an instance of the passed HMaster class.
3098   * @param masterClass
3099   * @return HMaster instance.
3100   */
3101  public static HMaster constructMaster(Class<? extends HMaster> masterClass,
3102      final Configuration conf)  {
3103    try {
3104      Constructor<? extends HMaster> c = masterClass.getConstructor(Configuration.class);
3105      return c.newInstance(conf);
3106    } catch(Exception e) {
3107      Throwable error = e;
3108      if (e instanceof InvocationTargetException &&
3109          ((InvocationTargetException)e).getTargetException() != null) {
3110        error = ((InvocationTargetException)e).getTargetException();
3111      }
3112      throw new RuntimeException("Failed construction of Master: " + masterClass.toString() + ". "
3113        , error);
3114    }
3115  }
3116
3117  /**
3118   * @see org.apache.hadoop.hbase.master.HMasterCommandLine
3119   */
3120  public static void main(String [] args) {
3121    LOG.info("STARTING service " + HMaster.class.getSimpleName());
3122    VersionInfo.logVersion();
3123    new HMasterCommandLine(HMaster.class).doMain(args);
3124  }
3125
3126  public HFileCleaner getHFileCleaner() {
3127    return this.hfileCleaner;
3128  }
3129
3130  public LogCleaner getLogCleaner() {
3131    return this.logCleaner;
3132  }
3133
3134  /**
3135   * @return the underlying snapshot manager
3136   */
3137  @Override
3138  public SnapshotManager getSnapshotManager() {
3139    return this.snapshotManager;
3140  }
3141
3142  /**
3143   * @return the underlying MasterProcedureManagerHost
3144   */
3145  @Override
3146  public MasterProcedureManagerHost getMasterProcedureManagerHost() {
3147    return mpmHost;
3148  }
3149
3150  @Override
3151  public ClusterSchema getClusterSchema() {
3152    return this.clusterSchemaService;
3153  }
3154
3155  /**
3156   * Create a new Namespace.
3157   * @param namespaceDescriptor descriptor for new Namespace
3158   * @param nonceGroup Identifier for the source of the request, a client or process.
3159   * @param nonce A unique identifier for this operation from the client or process identified by
3160   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3161   * @return procedure id
3162   */
3163  long createNamespace(final NamespaceDescriptor namespaceDescriptor, final long nonceGroup,
3164      final long nonce) throws IOException {
3165    checkInitialized();
3166
3167    TableName.isLegalNamespaceName(Bytes.toBytes(namespaceDescriptor.getName()));
3168
3169    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3170          nonceGroup, nonce) {
3171      @Override
3172      protected void run() throws IOException {
3173        getMaster().getMasterCoprocessorHost().preCreateNamespace(namespaceDescriptor);
3174        // We need to wait for the procedure to potentially fail due to "prepare" sanity
3175        // checks. This will block only the beginning of the procedure. See HBASE-19953.
3176        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3177        LOG.info(getClientIdAuditPrefix() + " creating " + namespaceDescriptor);
3178        // Execute the operation synchronously - wait for the operation to complete before
3179        // continuing.
3180        setProcId(getClusterSchema().createNamespace(namespaceDescriptor, getNonceKey(), latch));
3181        latch.await();
3182        getMaster().getMasterCoprocessorHost().postCreateNamespace(namespaceDescriptor);
3183      }
3184
3185      @Override
3186      protected String getDescription() {
3187        return "CreateNamespaceProcedure";
3188      }
3189    });
3190  }
3191
3192  /**
3193   * Modify an existing Namespace.
3194   * @param nonceGroup Identifier for the source of the request, a client or process.
3195   * @param nonce A unique identifier for this operation from the client or process identified by
3196   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3197   * @return procedure id
3198   */
3199  long modifyNamespace(final NamespaceDescriptor newNsDescriptor, final long nonceGroup,
3200      final long nonce) throws IOException {
3201    checkInitialized();
3202
3203    TableName.isLegalNamespaceName(Bytes.toBytes(newNsDescriptor.getName()));
3204
3205    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3206          nonceGroup, nonce) {
3207      @Override
3208      protected void run() throws IOException {
3209        NamespaceDescriptor oldNsDescriptor = getNamespace(newNsDescriptor.getName());
3210        getMaster().getMasterCoprocessorHost().preModifyNamespace(oldNsDescriptor, newNsDescriptor);
3211        // We need to wait for the procedure to potentially fail due to "prepare" sanity
3212        // checks. This will block only the beginning of the procedure. See HBASE-19953.
3213        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3214        LOG.info(getClientIdAuditPrefix() + " modify " + newNsDescriptor);
3215        // Execute the operation synchronously - wait for the operation to complete before
3216        // continuing.
3217        setProcId(getClusterSchema().modifyNamespace(newNsDescriptor, getNonceKey(), latch));
3218        latch.await();
3219        getMaster().getMasterCoprocessorHost().postModifyNamespace(oldNsDescriptor,
3220          newNsDescriptor);
3221      }
3222
3223      @Override
3224      protected String getDescription() {
3225        return "ModifyNamespaceProcedure";
3226      }
3227    });
3228  }
3229
3230  /**
3231   * Delete an existing Namespace. Only empty Namespaces (no tables) can be removed.
3232   * @param nonceGroup Identifier for the source of the request, a client or process.
3233   * @param nonce A unique identifier for this operation from the client or process identified by
3234   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3235   * @return procedure id
3236   */
3237  long deleteNamespace(final String name, final long nonceGroup, final long nonce)
3238      throws IOException {
3239    checkInitialized();
3240
3241    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3242          nonceGroup, nonce) {
3243      @Override
3244      protected void run() throws IOException {
3245        getMaster().getMasterCoprocessorHost().preDeleteNamespace(name);
3246        LOG.info(getClientIdAuditPrefix() + " delete " + name);
3247        // Execute the operation synchronously - wait for the operation to complete before
3248        // continuing.
3249        //
3250        // We need to wait for the procedure to potentially fail due to "prepare" sanity
3251        // checks. This will block only the beginning of the procedure. See HBASE-19953.
3252        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3253        setProcId(submitProcedure(
3254              new DeleteNamespaceProcedure(procedureExecutor.getEnvironment(), name, latch)));
3255        latch.await();
3256        // Will not be invoked in the face of Exception thrown by the Procedure's execution
3257        getMaster().getMasterCoprocessorHost().postDeleteNamespace(name);
3258      }
3259
3260      @Override
3261      protected String getDescription() {
3262        return "DeleteNamespaceProcedure";
3263      }
3264    });
3265  }
3266
3267  /**
3268   * Get a Namespace
3269   * @param name Name of the Namespace
3270   * @return Namespace descriptor for <code>name</code>
3271   */
3272  NamespaceDescriptor getNamespace(String name) throws IOException {
3273    checkInitialized();
3274    if (this.cpHost != null) this.cpHost.preGetNamespaceDescriptor(name);
3275    NamespaceDescriptor nsd = this.clusterSchemaService.getNamespace(name);
3276    if (this.cpHost != null) this.cpHost.postGetNamespaceDescriptor(nsd);
3277    return nsd;
3278  }
3279
3280  /**
3281   * Get all Namespaces
3282   * @return All Namespace descriptors
3283   */
3284  List<NamespaceDescriptor> getNamespaces() throws IOException {
3285    checkInitialized();
3286    final List<NamespaceDescriptor> nsds = new ArrayList<>();
3287    if (cpHost != null) {
3288      cpHost.preListNamespaceDescriptors(nsds);
3289    }
3290    nsds.addAll(this.clusterSchemaService.getNamespaces());
3291    if (this.cpHost != null) {
3292      this.cpHost.postListNamespaceDescriptors(nsds);
3293    }
3294    return nsds;
3295  }
3296
3297  @Override
3298  public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3299    checkInitialized();
3300    return listTableNames(name, null, true);
3301  }
3302
3303  @Override
3304  public List<TableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3305    checkInitialized();
3306    return listTableDescriptors(name, null, null, true);
3307  }
3308
3309  @Override
3310  public boolean abortProcedure(final long procId, final boolean mayInterruptIfRunning)
3311      throws IOException {
3312    if (cpHost != null) {
3313      cpHost.preAbortProcedure(this.procedureExecutor, procId);
3314    }
3315
3316    final boolean result = this.procedureExecutor.abort(procId, mayInterruptIfRunning);
3317
3318    if (cpHost != null) {
3319      cpHost.postAbortProcedure();
3320    }
3321
3322    return result;
3323  }
3324
3325  @Override
3326  public List<Procedure<?>> getProcedures() throws IOException {
3327    if (cpHost != null) {
3328      cpHost.preGetProcedures();
3329    }
3330
3331    @SuppressWarnings({ "unchecked", "rawtypes" })
3332    List<Procedure<?>> procList = (List) this.procedureExecutor.getProcedures();
3333
3334    if (cpHost != null) {
3335      cpHost.postGetProcedures(procList);
3336    }
3337
3338    return procList;
3339  }
3340
3341  @Override
3342  public List<LockedResource> getLocks() throws IOException {
3343    if (cpHost != null) {
3344      cpHost.preGetLocks();
3345    }
3346
3347    MasterProcedureScheduler procedureScheduler =
3348      procedureExecutor.getEnvironment().getProcedureScheduler();
3349
3350    final List<LockedResource> lockedResources = procedureScheduler.getLocks();
3351
3352    if (cpHost != null) {
3353      cpHost.postGetLocks(lockedResources);
3354    }
3355
3356    return lockedResources;
3357  }
3358
3359  /**
3360   * Returns the list of table descriptors that match the specified request
3361   * @param namespace the namespace to query, or null if querying for all
3362   * @param regex The regular expression to match against, or null if querying for all
3363   * @param tableNameList the list of table names, or null if querying for all
3364   * @param includeSysTables False to match only against userspace tables
3365   * @return the list of table descriptors
3366   */
3367  public List<TableDescriptor> listTableDescriptors(final String namespace, final String regex,
3368      final List<TableName> tableNameList, final boolean includeSysTables)
3369  throws IOException {
3370    List<TableDescriptor> htds = new ArrayList<>();
3371    if (cpHost != null) {
3372      cpHost.preGetTableDescriptors(tableNameList, htds, regex);
3373    }
3374    htds = getTableDescriptors(htds, namespace, regex, tableNameList, includeSysTables);
3375    if (cpHost != null) {
3376      cpHost.postGetTableDescriptors(tableNameList, htds, regex);
3377    }
3378    return htds;
3379  }
3380
3381  /**
3382   * Returns the list of table names that match the specified request
3383   * @param regex The regular expression to match against, or null if querying for all
3384   * @param namespace the namespace to query, or null if querying for all
3385   * @param includeSysTables False to match only against userspace tables
3386   * @return the list of table names
3387   */
3388  public List<TableName> listTableNames(final String namespace, final String regex,
3389      final boolean includeSysTables) throws IOException {
3390    List<TableDescriptor> htds = new ArrayList<>();
3391    if (cpHost != null) {
3392      cpHost.preGetTableNames(htds, regex);
3393    }
3394    htds = getTableDescriptors(htds, namespace, regex, null, includeSysTables);
3395    if (cpHost != null) {
3396      cpHost.postGetTableNames(htds, regex);
3397    }
3398    List<TableName> result = new ArrayList<>(htds.size());
3399    for (TableDescriptor htd: htds) result.add(htd.getTableName());
3400    return result;
3401  }
3402
3403  /**
3404   * @return list of table table descriptors after filtering by regex and whether to include system
3405   *    tables, etc.
3406   * @throws IOException
3407   */
3408  private List<TableDescriptor> getTableDescriptors(final List<TableDescriptor> htds,
3409      final String namespace, final String regex, final List<TableName> tableNameList,
3410      final boolean includeSysTables)
3411  throws IOException {
3412    if (tableNameList == null || tableNameList.isEmpty()) {
3413      // request for all TableDescriptors
3414      Collection<TableDescriptor> allHtds;
3415      if (namespace != null && namespace.length() > 0) {
3416        // Do a check on the namespace existence. Will fail if does not exist.
3417        this.clusterSchemaService.getNamespace(namespace);
3418        allHtds = tableDescriptors.getByNamespace(namespace).values();
3419      } else {
3420        allHtds = tableDescriptors.getAll().values();
3421      }
3422      for (TableDescriptor desc: allHtds) {
3423        if (tableStateManager.isTablePresent(desc.getTableName())
3424            && (includeSysTables || !desc.getTableName().isSystemTable())) {
3425          htds.add(desc);
3426        }
3427      }
3428    } else {
3429      for (TableName s: tableNameList) {
3430        if (tableStateManager.isTablePresent(s)) {
3431          TableDescriptor desc = tableDescriptors.get(s);
3432          if (desc != null) {
3433            htds.add(desc);
3434          }
3435        }
3436      }
3437    }
3438
3439    // Retains only those matched by regular expression.
3440    if (regex != null) filterTablesByRegex(htds, Pattern.compile(regex));
3441    return htds;
3442  }
3443
3444  /**
3445   * Removes the table descriptors that don't match the pattern.
3446   * @param descriptors list of table descriptors to filter
3447   * @param pattern the regex to use
3448   */
3449  private static void filterTablesByRegex(final Collection<TableDescriptor> descriptors,
3450      final Pattern pattern) {
3451    final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
3452    Iterator<TableDescriptor> itr = descriptors.iterator();
3453    while (itr.hasNext()) {
3454      TableDescriptor htd = itr.next();
3455      String tableName = htd.getTableName().getNameAsString();
3456      boolean matched = pattern.matcher(tableName).matches();
3457      if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
3458        matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
3459      }
3460      if (!matched) {
3461        itr.remove();
3462      }
3463    }
3464  }
3465
3466  @Override
3467  public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
3468    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3469        .getLastMajorCompactionTimestamp(table);
3470  }
3471
3472  @Override
3473  public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
3474    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3475        .getLastMajorCompactionTimestamp(regionName);
3476  }
3477
3478  /**
3479   * Gets the mob file compaction state for a specific table.
3480   * Whether all the mob files are selected is known during the compaction execution, but
3481   * the statistic is done just before compaction starts, it is hard to know the compaction
3482   * type at that time, so the rough statistics are chosen for the mob file compaction. Only two
3483   * compaction states are available, CompactionState.MAJOR_AND_MINOR and CompactionState.NONE.
3484   * @param tableName The current table name.
3485   * @return If a given table is in mob file compaction now.
3486   */
3487  public CompactionState getMobCompactionState(TableName tableName) {
3488    AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3489    if (compactionsCount != null && compactionsCount.get() != 0) {
3490      return CompactionState.MAJOR_AND_MINOR;
3491    }
3492    return CompactionState.NONE;
3493  }
3494
3495  public void reportMobCompactionStart(TableName tableName) throws IOException {
3496    IdLock.Entry lockEntry = null;
3497    try {
3498      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3499      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3500      if (compactionsCount == null) {
3501        compactionsCount = new AtomicInteger(0);
3502        mobCompactionStates.put(tableName, compactionsCount);
3503      }
3504      compactionsCount.incrementAndGet();
3505    } finally {
3506      if (lockEntry != null) {
3507        mobCompactionLock.releaseLockEntry(lockEntry);
3508      }
3509    }
3510  }
3511
3512  public void reportMobCompactionEnd(TableName tableName) throws IOException {
3513    IdLock.Entry lockEntry = null;
3514    try {
3515      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3516      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3517      if (compactionsCount != null) {
3518        int count = compactionsCount.decrementAndGet();
3519        // remove the entry if the count is 0.
3520        if (count == 0) {
3521          mobCompactionStates.remove(tableName);
3522        }
3523      }
3524    } finally {
3525      if (lockEntry != null) {
3526        mobCompactionLock.releaseLockEntry(lockEntry);
3527      }
3528    }
3529  }
3530
3531  /**
3532   * Requests mob compaction.
3533   * @param tableName The table the compact.
3534   * @param columns The compacted columns.
3535   * @param allFiles Whether add all mob files into the compaction.
3536   */
3537  public void requestMobCompaction(TableName tableName,
3538                                   List<ColumnFamilyDescriptor> columns, boolean allFiles) throws IOException {
3539    mobCompactThread.requestMobCompaction(conf, fs, tableName, columns, allFiles);
3540  }
3541
3542  /**
3543   * Queries the state of the {@link LoadBalancerTracker}. If the balancer is not initialized,
3544   * false is returned.
3545   *
3546   * @return The state of the load balancer, or false if the load balancer isn't defined.
3547   */
3548  public boolean isBalancerOn() {
3549    return !isInMaintenanceMode()
3550        && loadBalancerTracker != null
3551        && loadBalancerTracker.isBalancerOn();
3552  }
3553
3554  /**
3555   * Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized,
3556   * false is returned.
3557   */
3558  public boolean isNormalizerOn() {
3559    return !isInMaintenanceMode()
3560        && regionNormalizerTracker != null
3561        && regionNormalizerTracker.isNormalizerOn();
3562  }
3563
3564  /**
3565   * Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized,
3566   * false is returned. If switchType is illegal, false will return.
3567   * @param switchType see {@link org.apache.hadoop.hbase.client.MasterSwitchType}
3568   * @return The state of the switch
3569   */
3570  @Override
3571  public boolean isSplitOrMergeEnabled(MasterSwitchType switchType) {
3572    return !isInMaintenanceMode()
3573        && splitOrMergeTracker != null
3574        && splitOrMergeTracker.isSplitOrMergeEnabled(switchType);
3575  }
3576
3577  /**
3578   * Fetch the configured {@link LoadBalancer} class name. If none is set, a default is returned.
3579   *
3580   * @return The name of the {@link LoadBalancer} in use.
3581   */
3582  public String getLoadBalancerClassName() {
3583    return conf.get(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, LoadBalancerFactory
3584        .getDefaultLoadBalancerClass().getName());
3585  }
3586
3587  /**
3588   * @return RegionNormalizerTracker instance
3589   */
3590  public RegionNormalizerTracker getRegionNormalizerTracker() {
3591    return regionNormalizerTracker;
3592  }
3593
3594  public SplitOrMergeTracker getSplitOrMergeTracker() {
3595    return splitOrMergeTracker;
3596  }
3597
3598  @Override
3599  public LoadBalancer getLoadBalancer() {
3600    return balancer;
3601  }
3602
3603  @Override
3604  public FavoredNodesManager getFavoredNodesManager() {
3605    return favoredNodesManager;
3606  }
3607
3608  private long executePeerProcedure(ModifyPeerProcedure procedure) throws IOException {
3609    long procId = procedureExecutor.submitProcedure(procedure);
3610    procedure.getLatch().await();
3611    return procId;
3612  }
3613
3614  @Override
3615  public long addReplicationPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
3616      throws ReplicationException, IOException {
3617    LOG.info(getClientIdAuditPrefix() + " creating replication peer, id=" + peerId + ", config=" +
3618      peerConfig + ", state=" + (enabled ? "ENABLED" : "DISABLED"));
3619    return executePeerProcedure(new AddPeerProcedure(peerId, peerConfig, enabled));
3620  }
3621
3622  @Override
3623  public long removeReplicationPeer(String peerId) throws ReplicationException, IOException {
3624    LOG.info(getClientIdAuditPrefix() + " removing replication peer, id=" + peerId);
3625    return executePeerProcedure(new RemovePeerProcedure(peerId));
3626  }
3627
3628  @Override
3629  public long enableReplicationPeer(String peerId) throws ReplicationException, IOException {
3630    LOG.info(getClientIdAuditPrefix() + " enable replication peer, id=" + peerId);
3631    return executePeerProcedure(new EnablePeerProcedure(peerId));
3632  }
3633
3634  @Override
3635  public long disableReplicationPeer(String peerId) throws ReplicationException, IOException {
3636    LOG.info(getClientIdAuditPrefix() + " disable replication peer, id=" + peerId);
3637    return executePeerProcedure(new DisablePeerProcedure(peerId));
3638  }
3639
3640  @Override
3641  public ReplicationPeerConfig getReplicationPeerConfig(String peerId)
3642      throws ReplicationException, IOException {
3643    if (cpHost != null) {
3644      cpHost.preGetReplicationPeerConfig(peerId);
3645    }
3646    LOG.info(getClientIdAuditPrefix() + " get replication peer config, id=" + peerId);
3647    ReplicationPeerConfig peerConfig = this.replicationPeerManager.getPeerConfig(peerId)
3648        .orElseThrow(() -> new ReplicationPeerNotFoundException(peerId));
3649    if (cpHost != null) {
3650      cpHost.postGetReplicationPeerConfig(peerId);
3651    }
3652    return peerConfig;
3653  }
3654
3655  @Override
3656  public long updateReplicationPeerConfig(String peerId, ReplicationPeerConfig peerConfig)
3657      throws ReplicationException, IOException {
3658    LOG.info(getClientIdAuditPrefix() + " update replication peer config, id=" + peerId +
3659      ", config=" + peerConfig);
3660    return executePeerProcedure(new UpdatePeerConfigProcedure(peerId, peerConfig));
3661  }
3662
3663  @Override
3664  public List<ReplicationPeerDescription> listReplicationPeers(String regex)
3665      throws ReplicationException, IOException {
3666    if (cpHost != null) {
3667      cpHost.preListReplicationPeers(regex);
3668    }
3669    LOG.debug("{} list replication peers, regex={}", getClientIdAuditPrefix(), regex);
3670    Pattern pattern = regex == null ? null : Pattern.compile(regex);
3671    List<ReplicationPeerDescription> peers =
3672      this.replicationPeerManager.listPeers(pattern);
3673    if (cpHost != null) {
3674      cpHost.postListReplicationPeers(regex);
3675    }
3676    return peers;
3677  }
3678
3679  /**
3680   * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional
3681   * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0
3682   * @param servers Region servers to decommission.
3683   * @throws HBaseIOException
3684   */
3685  public void decommissionRegionServers(final List<ServerName> servers, final boolean offload)
3686      throws HBaseIOException {
3687    List<ServerName> serversAdded = new ArrayList<>(servers.size());
3688    // Place the decommission marker first.
3689    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3690    for (ServerName server : servers) {
3691      try {
3692        String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3693        ZKUtil.createAndFailSilent(getZooKeeper(), node);
3694      } catch (KeeperException ke) {
3695        throw new HBaseIOException(
3696            this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke);
3697      }
3698      if (this.serverManager.addServerToDrainList(server)) {
3699        serversAdded.add(server);
3700      };
3701    }
3702    // Move the regions off the decommissioned servers.
3703    if (offload) {
3704      final List<ServerName> destServers = this.serverManager.createDestinationServersList();
3705      for (ServerName server : serversAdded) {
3706        final List<RegionInfo> regionsOnServer =
3707            this.assignmentManager.getRegionStates().getServerRegionInfoSet(server);
3708        for (RegionInfo hri : regionsOnServer) {
3709          ServerName dest = balancer.randomAssignment(hri, destServers);
3710          if (dest == null) {
3711            throw new HBaseIOException("Unable to determine a plan to move " + hri);
3712          }
3713          RegionPlan rp = new RegionPlan(hri, server, dest);
3714          this.assignmentManager.moveAsync(rp);
3715        }
3716      }
3717    }
3718  }
3719
3720  /**
3721   * List region servers marked as decommissioned (previously called 'draining') to not get regions
3722   * assigned to them.
3723   * @return List of decommissioned servers.
3724   */
3725  public List<ServerName> listDecommissionedRegionServers() {
3726    return this.serverManager.getDrainingServersList();
3727  }
3728
3729  /**
3730   * Remove decommission marker (previously called 'draining') from a region server to allow regions
3731   * assignments. Load regions onto the server asynchronously if a list of regions is given
3732   * @param server Region server to remove decommission marker from.
3733   * @throws HBaseIOException
3734   */
3735  public void recommissionRegionServer(final ServerName server,
3736      final List<byte[]> encodedRegionNames) throws HBaseIOException {
3737    // Remove the server from decommissioned (draining) server list.
3738    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3739    String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3740    try {
3741      ZKUtil.deleteNodeFailSilent(getZooKeeper(), node);
3742    } catch (KeeperException ke) {
3743      throw new HBaseIOException(
3744          this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke);
3745    }
3746    this.serverManager.removeServerFromDrainList(server);
3747
3748    // Load the regions onto the server if we are given a list of regions.
3749    if (encodedRegionNames == null || encodedRegionNames.isEmpty()) {
3750      return;
3751    }
3752    if (!this.serverManager.isServerOnline(server)) {
3753      return;
3754    }
3755    for (byte[] encodedRegionName : encodedRegionNames) {
3756      RegionState regionState =
3757          assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
3758      if (regionState == null) {
3759        LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName));
3760        continue;
3761      }
3762      RegionInfo hri = regionState.getRegion();
3763      if (server.equals(regionState.getServerName())) {
3764        LOG.info("Skipping move of region " + hri.getRegionNameAsString()
3765          + " because region already assigned to the same server " + server + ".");
3766        continue;
3767      }
3768      RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server);
3769      this.assignmentManager.moveAsync(rp);
3770    }
3771  }
3772
3773  @Override
3774  public LockManager getLockManager() {
3775    return lockManager;
3776  }
3777
3778  public QuotaObserverChore getQuotaObserverChore() {
3779    return this.quotaObserverChore;
3780  }
3781
3782  public SpaceQuotaSnapshotNotifier getSpaceQuotaSnapshotNotifier() {
3783    return this.spaceQuotaSnapshotNotifier;
3784  }
3785
3786  @SuppressWarnings("unchecked")
3787  private RemoteProcedure<MasterProcedureEnv, ?> getRemoteProcedure(long procId) {
3788    Procedure<?> procedure = procedureExecutor.getProcedure(procId);
3789    if (procedure == null) {
3790      return null;
3791    }
3792    assert procedure instanceof RemoteProcedure;
3793    return (RemoteProcedure<MasterProcedureEnv, ?>) procedure;
3794  }
3795
3796  public void remoteProcedureCompleted(long procId) {
3797    LOG.debug("Remote procedure done, pid={}", procId);
3798    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3799    if (procedure != null) {
3800      procedure.remoteOperationCompleted(procedureExecutor.getEnvironment());
3801    }
3802  }
3803
3804  public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
3805    LOG.debug("Remote procedure failed, pid={}", procId, error);
3806    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3807    if (procedure != null) {
3808      procedure.remoteOperationFailed(procedureExecutor.getEnvironment(), error);
3809    }
3810  }
3811
3812  @Override
3813  public ReplicationPeerManager getReplicationPeerManager() {
3814    return replicationPeerManager;
3815  }
3816
3817  public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
3818      getReplicationLoad(ServerName[] serverNames) {
3819    List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);
3820    if (peerList == null) {
3821      return null;
3822    }
3823    HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap =
3824        new HashMap<>(peerList.size());
3825    peerList.stream()
3826        .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>()));
3827    for (ServerName serverName : serverNames) {
3828      List<ReplicationLoadSource> replicationLoadSources =
3829          getServerManager().getLoad(serverName).getReplicationLoadSourceList();
3830      for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) {
3831        replicationLoadSourceMap.get(replicationLoadSource.getPeerID())
3832            .add(new Pair<>(serverName, replicationLoadSource));
3833      }
3834    }
3835    for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) {
3836      if (loads.size() > 0) {
3837        loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag()));
3838      }
3839    }
3840    return replicationLoadSourceMap;
3841  }
3842
3843  /**
3844   * This method modifies the master's configuration in order to inject replication-related features
3845   */
3846  @VisibleForTesting
3847  public static void decorateMasterConfiguration(Configuration conf) {
3848    String plugins = conf.get(HBASE_MASTER_LOGCLEANER_PLUGINS);
3849    String cleanerClass = ReplicationLogCleaner.class.getCanonicalName();
3850    if (!plugins.contains(cleanerClass)) {
3851      conf.set(HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
3852    }
3853    if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
3854      plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
3855      cleanerClass = ReplicationHFileCleaner.class.getCanonicalName();
3856      if (!plugins.contains(cleanerClass)) {
3857        conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, plugins + "," + cleanerClass);
3858      }
3859    }
3860  }
3861
3862  @Override
3863  public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
3864    if (!this.isOnline() || !LoadBalancer.isMasterCanHostUserRegions(conf)) {
3865      return new HashMap<>();
3866    }
3867    return super.getWalGroupsReplicationStatus();
3868  }
3869
3870  public HbckChore getHbckChore() {
3871    return this.hbckChore;
3872  }
3873
3874  public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
3875    return this.snapshotQuotaChore;
3876  }
3877}