001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
022import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
023import static org.apache.hadoop.hbase.util.DNS.MASTER_HOSTNAME_KEY;
024
025import java.io.IOException;
026import java.io.InterruptedIOException;
027import java.lang.reflect.Constructor;
028import java.lang.reflect.InvocationTargetException;
029import java.net.InetAddress;
030import java.net.InetSocketAddress;
031import java.net.UnknownHostException;
032import java.util.ArrayList;
033import java.util.Arrays;
034import java.util.Collection;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.EnumSet;
038import java.util.HashMap;
039import java.util.Iterator;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.Objects;
044import java.util.Optional;
045import java.util.Set;
046import java.util.concurrent.ExecutionException;
047import java.util.concurrent.Future;
048import java.util.concurrent.TimeUnit;
049import java.util.concurrent.TimeoutException;
050import java.util.concurrent.atomic.AtomicInteger;
051import java.util.regex.Pattern;
052import java.util.stream.Collectors;
053import javax.servlet.http.HttpServlet;
054import org.apache.commons.lang3.StringUtils;
055import org.apache.hadoop.conf.Configuration;
056import org.apache.hadoop.fs.FSDataOutputStream;
057import org.apache.hadoop.fs.Path;
058import org.apache.hadoop.hbase.ChoreService;
059import org.apache.hadoop.hbase.ClusterId;
060import org.apache.hadoop.hbase.ClusterMetrics;
061import org.apache.hadoop.hbase.ClusterMetrics.Option;
062import org.apache.hadoop.hbase.ClusterMetricsBuilder;
063import org.apache.hadoop.hbase.DoNotRetryIOException;
064import org.apache.hadoop.hbase.HBaseIOException;
065import org.apache.hadoop.hbase.HBaseInterfaceAudience;
066import org.apache.hadoop.hbase.HConstants;
067import org.apache.hadoop.hbase.InvalidFamilyOperationException;
068import org.apache.hadoop.hbase.MasterNotRunningException;
069import org.apache.hadoop.hbase.MetaTableAccessor;
070import org.apache.hadoop.hbase.NamespaceDescriptor;
071import org.apache.hadoop.hbase.PleaseHoldException;
072import org.apache.hadoop.hbase.RegionMetrics;
073import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
074import org.apache.hadoop.hbase.ServerMetrics;
075import org.apache.hadoop.hbase.ServerName;
076import org.apache.hadoop.hbase.TableName;
077import org.apache.hadoop.hbase.TableNotDisabledException;
078import org.apache.hadoop.hbase.TableNotFoundException;
079import org.apache.hadoop.hbase.UnknownRegionException;
080import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
081import org.apache.hadoop.hbase.client.CompactionState;
082import org.apache.hadoop.hbase.client.MasterSwitchType;
083import org.apache.hadoop.hbase.client.NormalizeTableFilterParams;
084import org.apache.hadoop.hbase.client.RegionInfo;
085import org.apache.hadoop.hbase.client.RegionInfoBuilder;
086import org.apache.hadoop.hbase.client.RegionStatesCount;
087import org.apache.hadoop.hbase.client.TableDescriptor;
088import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
089import org.apache.hadoop.hbase.client.TableState;
090import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
091import org.apache.hadoop.hbase.exceptions.MasterStoppedException;
092import org.apache.hadoop.hbase.executor.ExecutorType;
093import org.apache.hadoop.hbase.favored.FavoredNodesManager;
094import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
095import org.apache.hadoop.hbase.ipc.RpcServer;
096import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
097import org.apache.hadoop.hbase.log.HBaseMarkers;
098import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
099import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
100import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
101import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
102import org.apache.hadoop.hbase.master.assignment.RegionStates;
103import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
104import org.apache.hadoop.hbase.master.balancer.BalancerChore;
105import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
106import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
107import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
108import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
109import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
110import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner;
111import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore;
112import org.apache.hadoop.hbase.master.janitor.CatalogJanitor;
113import org.apache.hadoop.hbase.master.locking.LockManager;
114import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerFactory;
115import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerManager;
116import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
117import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
118import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
119import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
120import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
121import org.apache.hadoop.hbase.master.procedure.InitMetaProcedure;
122import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
123import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
124import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
125import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
126import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
127import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
128import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
129import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
130import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
131import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
132import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
133import org.apache.hadoop.hbase.master.region.MasterRegion;
134import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
135import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
136import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
137import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
138import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
139import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
140import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
141import org.apache.hadoop.hbase.master.replication.SyncReplicationReplayWALManager;
142import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure;
143import org.apache.hadoop.hbase.master.replication.UpdatePeerConfigProcedure;
144import org.apache.hadoop.hbase.master.slowlog.SlowLogMasterService;
145import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
146import org.apache.hadoop.hbase.master.zksyncer.MasterAddressSyncer;
147import org.apache.hadoop.hbase.master.zksyncer.MetaLocationSyncer;
148import org.apache.hadoop.hbase.mob.MobFileCleanerChore;
149import org.apache.hadoop.hbase.mob.MobFileCompactionChore;
150import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
151import org.apache.hadoop.hbase.monitoring.MonitoredTask;
152import org.apache.hadoop.hbase.monitoring.TaskMonitor;
153import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
154import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
155import org.apache.hadoop.hbase.procedure2.LockedResource;
156import org.apache.hadoop.hbase.procedure2.Procedure;
157import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
158import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
159import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
160import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
161import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
162import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
163import org.apache.hadoop.hbase.procedure2.store.region.RegionProcedureStore;
164import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
165import org.apache.hadoop.hbase.quotas.MasterQuotasObserver;
166import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
167import org.apache.hadoop.hbase.quotas.QuotaTableUtil;
168import org.apache.hadoop.hbase.quotas.QuotaUtil;
169import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore;
170import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
171import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
172import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
173import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
174import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
175import org.apache.hadoop.hbase.regionserver.HRegionServer;
176import org.apache.hadoop.hbase.regionserver.RSRpcServices;
177import org.apache.hadoop.hbase.replication.ReplicationException;
178import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
179import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
180import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
181import org.apache.hadoop.hbase.replication.ReplicationUtils;
182import org.apache.hadoop.hbase.replication.SyncReplicationState;
183import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner;
184import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
185import org.apache.hadoop.hbase.replication.regionserver.ReplicationStatus;
186import org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint;
187import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer;
188import org.apache.hadoop.hbase.rsgroup.RSGroupInfoManager;
189import org.apache.hadoop.hbase.rsgroup.RSGroupUtil;
190import org.apache.hadoop.hbase.security.AccessDeniedException;
191import org.apache.hadoop.hbase.security.SecurityConstants;
192import org.apache.hadoop.hbase.security.UserProvider;
193import org.apache.hadoop.hbase.trace.TraceUtil;
194import org.apache.hadoop.hbase.util.Addressing;
195import org.apache.hadoop.hbase.util.Bytes;
196import org.apache.hadoop.hbase.util.FutureUtils;
197import org.apache.hadoop.hbase.util.HBaseFsck;
198import org.apache.hadoop.hbase.util.HFileArchiveUtil;
199import org.apache.hadoop.hbase.util.IdLock;
200import org.apache.hadoop.hbase.util.ModifyRegionUtils;
201import org.apache.hadoop.hbase.util.Pair;
202import org.apache.hadoop.hbase.util.RetryCounter;
203import org.apache.hadoop.hbase.util.RetryCounterFactory;
204import org.apache.hadoop.hbase.util.TableDescriptorChecker;
205import org.apache.hadoop.hbase.util.Threads;
206import org.apache.hadoop.hbase.util.VersionInfo;
207import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
208import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
209import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker;
210import org.apache.hadoop.hbase.zookeeper.SnapshotCleanupTracker;
211import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
212import org.apache.hadoop.hbase.zookeeper.ZKUtil;
213import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
214import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
215import org.apache.yetus.audience.InterfaceAudience;
216import org.apache.zookeeper.KeeperException;
217import org.slf4j.Logger;
218import org.slf4j.LoggerFactory;
219
220import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
221import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
222import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
223import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
224import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors;
225import org.apache.hbase.thirdparty.com.google.protobuf.Service;
226import org.apache.hbase.thirdparty.org.eclipse.jetty.server.Server;
227import org.apache.hbase.thirdparty.org.eclipse.jetty.server.ServerConnector;
228import org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHolder;
229import org.apache.hbase.thirdparty.org.eclipse.jetty.webapp.WebAppContext;
230
231import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
232import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
233import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
234
235/**
236 * HMaster is the "master server" for HBase. An HBase cluster has one active master. If many masters
237 * are started, all compete. Whichever wins goes on to run the cluster. All others park themselves
238 * in their constructor until master or cluster shutdown or until the active master loses its lease
239 * in zookeeper. Thereafter, all running master jostle to take over master role.
240 * <p/>
241 * The Master can be asked shutdown the cluster. See {@link #shutdown()}. In this case it will tell
242 * all regionservers to go down and then wait on them all reporting in that they are down. This
243 * master will then shut itself down.
244 * <p/>
245 * You can also shutdown just this master. Call {@link #stopMaster()}.
246 * @see org.apache.zookeeper.Watcher
247 */
248@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
249@SuppressWarnings("deprecation")
250public class HMaster extends HRegionServer implements MasterServices {
251
252  private static final Logger LOG = LoggerFactory.getLogger(HMaster.class);
253
254  // MASTER is name of the webapp and the attribute name used stuffing this
255  //instance into web context.
256  public static final String MASTER = "master";
257
258  // Manager and zk listener for master election
259  private final ActiveMasterManager activeMasterManager;
260  // Region server tracker
261  private RegionServerTracker regionServerTracker;
262  // Draining region server tracker
263  private DrainingServerTracker drainingServerTracker;
264  // Tracker for load balancer state
265  LoadBalancerTracker loadBalancerTracker;
266  // Tracker for meta location, if any client ZK quorum specified
267  private MetaLocationSyncer metaLocationSyncer;
268  // Tracker for active master location, if any client ZK quorum specified
269  @InterfaceAudience.Private
270  MasterAddressSyncer masterAddressSyncer;
271  // Tracker for auto snapshot cleanup state
272  SnapshotCleanupTracker snapshotCleanupTracker;
273
274  // Tracker for split and merge state
275  private SplitOrMergeTracker splitOrMergeTracker;
276
277  private ClusterSchemaService clusterSchemaService;
278
279  public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS =
280    "hbase.master.wait.on.service.seconds";
281  public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60;
282
283  public static final String HBASE_MASTER_CLEANER_INTERVAL = "hbase.master.cleaner.interval";
284
285  public static final int DEFAULT_HBASE_MASTER_CLEANER_INTERVAL = 600 * 1000;
286
287  // Metrics for the HMaster
288  final MetricsMaster metricsMaster;
289  // file system manager for the master FS operations
290  private MasterFileSystem fileSystemManager;
291  private MasterWalManager walManager;
292
293  // manager to manage procedure-based WAL splitting, can be null if current
294  // is zk-based WAL splitting. SplitWALManager will replace SplitLogManager
295  // and MasterWalManager, which means zk-based WAL splitting code will be
296  // useless after we switch to the procedure-based one. our eventual goal
297  // is to remove all the zk-based WAL splitting code.
298  private SplitWALManager splitWALManager;
299
300  // server manager to deal with region server info
301  private volatile ServerManager serverManager;
302
303  // manager of assignment nodes in zookeeper
304  private AssignmentManager assignmentManager;
305
306
307  /**
308   * Cache for the meta region replica's locations. Also tracks their changes to avoid stale
309   * cache entries.
310   */
311  private final MetaRegionLocationCache metaRegionLocationCache;
312
313  private RSGroupInfoManager rsGroupInfoManager;
314
315  // manager of replication
316  private ReplicationPeerManager replicationPeerManager;
317
318  private SyncReplicationReplayWALManager syncReplicationReplayWALManager;
319
320  // buffer for "fatal error" notices from region servers
321  // in the cluster. This is only used for assisting
322  // operations/debugging.
323  MemoryBoundedLogMessageBuffer rsFatals;
324
325  // flag set after we become the active master (used for testing)
326  private volatile boolean activeMaster = false;
327
328  // flag set after we complete initialization once active
329  private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
330
331  // flag set after master services are started,
332  // initialization may have not completed yet.
333  volatile boolean serviceStarted = false;
334
335  // Maximum time we should run balancer for
336  private final int maxBalancingTime;
337  // Maximum percent of regions in transition when balancing
338  private final double maxRitPercent;
339
340  private final LockManager lockManager = new LockManager(this);
341
342  private RSGroupBasedLoadBalancer balancer;
343  private BalancerChore balancerChore;
344  private RegionNormalizerManager regionNormalizerManager;
345  private ClusterStatusChore clusterStatusChore;
346  private ClusterStatusPublisher clusterStatusPublisherChore = null;
347  private SnapshotCleanerChore snapshotCleanerChore = null;
348
349  private HbckChore hbckChore;
350  CatalogJanitor catalogJanitorChore;
351  private DirScanPool cleanerPool;
352  private LogCleaner logCleaner;
353  private HFileCleaner hfileCleaner;
354  private ReplicationBarrierCleaner replicationBarrierCleaner;
355  private MobFileCleanerChore mobFileCleanerChore;
356  private MobFileCompactionChore mobFileCompactionChore;
357  // used to synchronize the mobCompactionStates
358  private final IdLock mobCompactionLock = new IdLock();
359  // save the information of mob compactions in tables.
360  // the key is table name, the value is the number of compactions in that table.
361  private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap();
362
363  MasterCoprocessorHost cpHost;
364
365  private final boolean preLoadTableDescriptors;
366
367  // Time stamps for when a hmaster became active
368  private long masterActiveTime;
369
370  // Time stamp for when HMaster finishes becoming Active Master
371  private long masterFinishedInitializationTime;
372
373  Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
374
375  // monitor for snapshot of hbase tables
376  SnapshotManager snapshotManager;
377  // monitor for distributed procedures
378  private MasterProcedureManagerHost mpmHost;
379
380  private RegionsRecoveryChore regionsRecoveryChore = null;
381
382  private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
383  // it is assigned after 'initialized' guard set to true, so should be volatile
384  private volatile MasterQuotaManager quotaManager;
385  private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
386  private QuotaObserverChore quotaObserverChore;
387  private SnapshotQuotaObserverChore snapshotQuotaChore;
388
389  private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
390  private ProcedureStore procedureStore;
391
392  // the master local storage to store procedure data, etc.
393  private MasterRegion masterRegion;
394
395  // handle table states
396  private TableStateManager tableStateManager;
397
398  /** jetty server for master to redirect requests to regionserver infoServer */
399  private Server masterJettyServer;
400
401  // Determine if we should do normal startup or minimal "single-user" mode with no region
402  // servers and no user tables. Useful for repair and recovery of hbase:meta
403  private final boolean maintenanceMode;
404  static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
405
406  // Cached clusterId on stand by masters to serve clusterID requests from clients.
407  private final CachedClusterId cachedClusterId;
408
409  /**
410   * Initializes the HMaster. The steps are as follows:
411   * <p>
412   * <ol>
413   * <li>Initialize the local HRegionServer
414   * <li>Start the ActiveMasterManager.
415   * </ol>
416   * <p>
417   * Remaining steps of initialization occur in
418   * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the
419   * active one.
420   */
421  public HMaster(final Configuration conf) throws IOException {
422    super(conf);
423    TraceUtil.initTracer(conf);
424    try {
425      if (conf.getBoolean(MAINTENANCE_MODE, false)) {
426        LOG.info("Detected {}=true via configuration.", MAINTENANCE_MODE);
427        maintenanceMode = true;
428      } else if (Boolean.getBoolean(MAINTENANCE_MODE)) {
429        LOG.info("Detected {}=true via environment variables.", MAINTENANCE_MODE);
430        maintenanceMode = true;
431      } else {
432        maintenanceMode = false;
433      }
434      this.rsFatals = new MemoryBoundedLogMessageBuffer(
435          conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
436      LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}", getDataRootDir(),
437          this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
438
439      // Disable usage of meta replicas in the master
440      this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
441
442      decorateMasterConfiguration(this.conf);
443
444      // Hack! Maps DFSClient => Master for logs.  HDFS made this
445      // config param for task trackers, but we can piggyback off of it.
446      if (this.conf.get("mapreduce.task.attempt.id") == null) {
447        this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
448      }
449
450      this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this));
451
452      // preload table descriptor at startup
453      this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
454
455      this.maxBalancingTime = getMaxBalancingTime();
456      this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
457          HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
458
459      // Do we publish the status?
460
461      boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
462          HConstants.STATUS_PUBLISHED_DEFAULT);
463      Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
464          conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
465              ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
466              ClusterStatusPublisher.Publisher.class);
467
468      if (shouldPublish) {
469        if (publisherClass == null) {
470          LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " +
471              ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS +
472              " is not set - not publishing status");
473        } else {
474          clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
475          LOG.debug("Created {}", this.clusterStatusPublisherChore);
476          getChoreService().scheduleChore(clusterStatusPublisherChore);
477        }
478      }
479
480      this.metaRegionLocationCache = new MetaRegionLocationCache(this.zooKeeper);
481      this.activeMasterManager = createActiveMasterManager(zooKeeper, serverName, this);
482
483      cachedClusterId = new CachedClusterId(this, conf);
484    } catch (Throwable t) {
485      // Make sure we log the exception. HMaster is often started via reflection and the
486      // cause of failed startup is lost.
487      LOG.error("Failed construction of Master", t);
488      throw t;
489    }
490  }
491
492  /**
493   * Protected to have custom implementations in tests override the default ActiveMaster
494   * implementation.
495   */
496  protected ActiveMasterManager createActiveMasterManager(ZKWatcher zk, ServerName sn,
497      org.apache.hadoop.hbase.Server server) throws InterruptedIOException {
498    return new ActiveMasterManager(zk, sn, server);
499  }
500
501  @Override
502  protected String getUseThisHostnameInstead(Configuration conf) {
503    return conf.get(MASTER_HOSTNAME_KEY);
504  }
505
506  // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will
507  // block in here until then.
508  @Override
509  public void run() {
510    try {
511      Threads.setDaemonThreadRunning(new Thread(() -> {
512        try {
513          int infoPort = putUpJettyServer();
514          startActiveMasterManager(infoPort);
515        } catch (Throwable t) {
516          // Make sure we log the exception.
517          String error = "Failed to become Active Master";
518          LOG.error(error, t);
519          // Abort should have been called already.
520          if (!isAborted()) {
521            abort(error, t);
522          }
523        }
524      }), getName() + ":becomeActiveMaster");
525      // Fall in here even if we have been aborted. Need to run the shutdown services and
526      // the super run call will do this for us.
527      super.run();
528    } finally {
529      if (this.clusterSchemaService != null) {
530        // If on way out, then we are no longer active master.
531        this.clusterSchemaService.stopAsync();
532        try {
533          this.clusterSchemaService.awaitTerminated(
534              getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
535              DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
536        } catch (TimeoutException te) {
537          LOG.warn("Failed shutdown of clusterSchemaService", te);
538        }
539      }
540      this.activeMaster = false;
541    }
542  }
543
544  // return the actual infoPort, -1 means disable info server.
545  private int putUpJettyServer() throws IOException {
546    if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
547      return -1;
548    }
549    final int infoPort = conf.getInt("hbase.master.info.port.orig",
550      HConstants.DEFAULT_MASTER_INFOPORT);
551    // -1 is for disabling info server, so no redirecting
552    if (infoPort < 0 || infoServer == null) {
553      return -1;
554    }
555    if(infoPort == infoServer.getPort()) {
556      return infoPort;
557    }
558    final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
559    if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
560      String msg =
561          "Failed to start redirecting jetty server. Address " + addr
562              + " does not belong to this host. Correct configuration parameter: "
563              + "hbase.master.info.bindAddress";
564      LOG.error(msg);
565      throw new IOException(msg);
566    }
567
568    // TODO I'm pretty sure we could just add another binding to the InfoServer run by
569    // the RegionServer and have it run the RedirectServlet instead of standing up
570    // a second entire stack here.
571    masterJettyServer = new Server();
572    final ServerConnector connector = new ServerConnector(masterJettyServer);
573    connector.setHost(addr);
574    connector.setPort(infoPort);
575    masterJettyServer.addConnector(connector);
576    masterJettyServer.setStopAtShutdown(true);
577
578    final String redirectHostname =
579        StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead;
580
581    final MasterRedirectServlet redirect = new MasterRedirectServlet(infoServer, redirectHostname);
582    final WebAppContext context = new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS);
583    context.addServlet(new ServletHolder(redirect), "/*");
584    context.setServer(masterJettyServer);
585
586    try {
587      masterJettyServer.start();
588    } catch (Exception e) {
589      throw new IOException("Failed to start redirecting jetty server", e);
590    }
591    return connector.getLocalPort();
592  }
593
594  /**
595   * For compatibility, if failed with regionserver credentials, try the master one
596   */
597  @Override
598  protected void login(UserProvider user, String host) throws IOException {
599    try {
600      super.login(user, host);
601    } catch (IOException ie) {
602      user.login(SecurityConstants.MASTER_KRB_KEYTAB_FILE,
603              SecurityConstants.MASTER_KRB_PRINCIPAL, host);
604    }
605  }
606
607  /**
608   * If configured to put regions on active master,
609   * wait till a backup master becomes active.
610   * Otherwise, loop till the server is stopped or aborted.
611   */
612  @Override
613  protected void waitForMasterActive(){
614    if (maintenanceMode) {
615      return;
616    }
617    boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(conf);
618    while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) {
619      sleeper.sleep();
620    }
621  }
622
623  @InterfaceAudience.Private
624  public MasterRpcServices getMasterRpcServices() {
625    return (MasterRpcServices)rpcServices;
626  }
627
628  public boolean balanceSwitch(final boolean b) throws IOException {
629    return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
630  }
631
632  @Override
633  protected String getProcessName() {
634    return MASTER;
635  }
636
637  @Override
638  protected boolean canCreateBaseZNode() {
639    return true;
640  }
641
642  @Override
643  protected boolean canUpdateTableDescriptor() {
644    return true;
645  }
646
647  @Override
648  protected boolean cacheTableDescriptor() {
649    return true;
650  }
651
652  @Override
653  protected RSRpcServices createRpcServices() throws IOException {
654    return new MasterRpcServices(this);
655  }
656
657  @Override
658  protected void configureInfoServer() {
659    infoServer.addUnprivilegedServlet("master-status", "/master-status", MasterStatusServlet.class);
660    infoServer.setAttribute(MASTER, this);
661    if (LoadBalancer.isTablesOnMaster(conf)) {
662      super.configureInfoServer();
663    }
664  }
665
666  @Override
667  protected Class<? extends HttpServlet> getDumpServlet() {
668    return MasterDumpServlet.class;
669  }
670
671  @Override
672  public MetricsMaster getMasterMetrics() {
673    return metricsMaster;
674  }
675
676  /**
677   * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
678   * should have already been initialized along with {@link ServerManager}.
679   */
680  private void initializeZKBasedSystemTrackers()
681      throws IOException, KeeperException, ReplicationException {
682    this.balancer = new RSGroupBasedLoadBalancer();
683    this.balancer.setConf(conf);
684    this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
685    this.loadBalancerTracker.start();
686
687    this.regionNormalizerManager =
688      RegionNormalizerFactory.createNormalizerManager(conf, zooKeeper, this);
689    this.configurationManager.registerObserver(regionNormalizerManager);
690    this.regionNormalizerManager.start();
691
692    this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this);
693    this.splitOrMergeTracker.start();
694
695    // This is for backwards compatible. We do not need the CP for rs group now but if user want to
696    // load it, we need to enable rs group.
697    String[] cpClasses = conf.getStrings(MasterCoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
698    if (cpClasses != null) {
699      for (String cpClass : cpClasses) {
700        if (RSGroupAdminEndpoint.class.getName().equals(cpClass)) {
701          RSGroupUtil.enableRSGroup(conf);
702          break;
703        }
704      }
705    }
706    this.rsGroupInfoManager = RSGroupInfoManager.create(this);
707
708    this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf, clusterId);
709
710    this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
711    this.drainingServerTracker.start();
712
713    this.snapshotCleanupTracker = new SnapshotCleanupTracker(zooKeeper, this);
714    this.snapshotCleanupTracker.start();
715
716    String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM);
717    boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE,
718      HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE);
719    if (clientQuorumServers != null && !clientZkObserverMode) {
720      // we need to take care of the ZK information synchronization
721      // if given client ZK are not observer nodes
722      ZKWatcher clientZkWatcher = new ZKWatcher(conf,
723          getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this,
724          false, true);
725      this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this);
726      this.metaLocationSyncer.start();
727      this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this);
728      this.masterAddressSyncer.start();
729      // set cluster id is a one-go effort
730      ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId());
731    }
732
733    // Set the cluster as up.  If new RSs, they'll be waiting on this before
734    // going ahead with their startup.
735    boolean wasUp = this.clusterStatusTracker.isClusterUp();
736    if (!wasUp) this.clusterStatusTracker.setClusterUp();
737
738    LOG.info("Active/primary master=" + this.serverName +
739        ", sessionid=0x" +
740        Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) +
741        ", setting cluster-up flag (Was=" + wasUp + ")");
742
743    // create/initialize the snapshot manager and other procedure managers
744    this.snapshotManager = new SnapshotManager();
745    this.mpmHost = new MasterProcedureManagerHost();
746    this.mpmHost.register(this.snapshotManager);
747    this.mpmHost.register(new MasterFlushTableProcedureManager());
748    this.mpmHost.loadProcedures(conf);
749    this.mpmHost.initialize(this, this.metricsMaster);
750  }
751
752  // Will be overriden in test to inject customized AssignmentManager
753  @InterfaceAudience.Private
754  protected AssignmentManager createAssignmentManager(MasterServices master) {
755    return new AssignmentManager(master);
756  }
757
758  /**
759   * Finish initialization of HMaster after becoming the primary master.
760   * <p/>
761   * The startup order is a bit complicated but very important, do not change it unless you know
762   * what you are doing.
763   * <ol>
764   * <li>Initialize file system based components - file system manager, wal manager, table
765   * descriptors, etc</li>
766   * <li>Publish cluster id</li>
767   * <li>Here comes the most complicated part - initialize server manager, assignment manager and
768   * region server tracker
769   * <ol type='i'>
770   * <li>Create server manager</li>
771   * <li>Create procedure executor, load the procedures, but do not start workers. We will start it
772   * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
773   * server</li>
774   * <li>Create assignment manager and start it, load the meta region state, but do not load data
775   * from meta region</li>
776   * <li>Start region server tracker, construct the online servers set and find out dead servers and
777   * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
778   * scan the wal directory to find out possible live region servers, and the differences between
779   * these two sets are the dead servers</li>
780   * </ol>
781   * </li>
782   * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
783   * <li>Start necessary service threads - balancer, catalog janitor, executor services, and also
784   * the procedure executor, etc. Notice that the balancer must be created first as assignment
785   * manager may use it when assigning regions.</li>
786   * <li>Wait for meta to be initialized if necessary, start table state manager.</li>
787   * <li>Wait for enough region servers to check-in</li>
788   * <li>Let assignment manager load data from meta and construct region states</li>
789   * <li>Start all other things such as chore services, etc</li>
790   * </ol>
791   * <p/>
792   * Notice that now we will not schedule a special procedure to make meta online(unless the first
793   * time where meta has not been created yet), we will rely on SCP to bring meta online.
794   */
795  private void finishActiveMasterInitialization(MonitoredTask status) throws IOException,
796          InterruptedException, KeeperException, ReplicationException {
797    /*
798     * We are active master now... go initialize components we need to run.
799     */
800    status.setStatus("Initializing Master file system");
801
802    this.masterActiveTime = System.currentTimeMillis();
803    // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
804
805    // always initialize the MemStoreLAB as we use a region to store data in master now, see
806    // localStore.
807    initializeMemStoreChunkCreator();
808    this.fileSystemManager = new MasterFileSystem(conf);
809    this.walManager = new MasterWalManager(this);
810
811    // warm-up HTDs cache on master initialization
812    if (preLoadTableDescriptors) {
813      status.setStatus("Pre-loading table descriptors");
814      this.tableDescriptors.getAll();
815    }
816
817    // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
818    // only after it has checked in with the Master. At least a few tests ask Master for clusterId
819    // before it has called its run method and before RegionServer has done the reportForDuty.
820    ClusterId clusterId = fileSystemManager.getClusterId();
821    status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
822    ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
823    this.clusterId = clusterId.toString();
824
825    // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
826    // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
827    // hbase.write.hbck1.lock.file to false.
828    if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
829      Pair<Path, FSDataOutputStream> result = null;
830      try {
831        result = HBaseFsck.checkAndMarkRunningHbck(this.conf,
832            HBaseFsck.createLockRetryCounterFactory(this.conf).create());
833      } finally {
834        if (result != null) {
835          Closeables.close(result.getSecond(), true);
836        }
837      }
838    }
839
840    status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
841    // The below two managers must be created before loading procedures, as they will be used during
842    // loading.
843    this.serverManager = createServerManager(this);
844    this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
845    if (!conf.getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK,
846      DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
847      this.splitWALManager = new SplitWALManager(this);
848    }
849
850    // initialize master local region
851    masterRegion = MasterRegionFactory.create(this);
852    createProcedureExecutor();
853    Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType =
854      procedureExecutor.getActiveProceduresNoCopy().stream()
855        .collect(Collectors.groupingBy(p -> p.getClass()));
856
857    // Create Assignment Manager
858    this.assignmentManager = createAssignmentManager(this);
859    this.assignmentManager.start();
860    // TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as
861    // completed, it could still be in the procedure list. This is a bit strange but is another
862    // story, need to verify the implementation for ProcedureExecutor and ProcedureStore.
863    List<TransitRegionStateProcedure> ritList =
864      procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream()
865        .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p)
866        .collect(Collectors.toList());
867    this.assignmentManager.setupRIT(ritList);
868
869    // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
870    // be registered in the deadServers set -- and with the list of servernames out on the
871    // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
872    // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
873    // TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
874    this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager);
875    this.regionServerTracker.start(
876      procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
877        .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()),
878      walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir());
879    // This manager must be accessed AFTER hbase:meta is confirmed on line..
880    this.tableStateManager = new TableStateManager(this);
881
882    status.setStatus("Initializing ZK system trackers");
883    initializeZKBasedSystemTrackers();
884    status.setStatus("Loading last flushed sequence id of regions");
885    try {
886      this.serverManager.loadLastFlushedSequenceIds();
887    } catch (IOException e) {
888      LOG.info("Failed to load last flushed sequence id of regions"
889          + " from file system", e);
890    }
891    // Set ourselves as active Master now our claim has succeeded up in zk.
892    this.activeMaster = true;
893
894    // Start the Zombie master detector after setting master as active, see HBASE-21535
895    Thread zombieDetector = new Thread(new MasterInitializationMonitor(this),
896        "ActiveMasterInitializationMonitor-" + System.currentTimeMillis());
897    zombieDetector.setDaemon(true);
898    zombieDetector.start();
899
900    if (!maintenanceMode) {
901      // Add the Observer to delete quotas on table deletion before starting all CPs by
902      // default with quota support, avoiding if user specifically asks to not load this Observer.
903      if (QuotaUtil.isQuotaEnabled(conf)) {
904        updateConfigurationForQuotasObserver(conf);
905      }
906      // initialize master side coprocessors before we start handling requests
907      status.setStatus("Initializing master coprocessors");
908      this.cpHost = new MasterCoprocessorHost(this, this.conf);
909    }
910
911    // Checking if meta needs initializing.
912    status.setStatus("Initializing meta table if this is a new deploy");
913    InitMetaProcedure initMetaProc = null;
914    // Print out state of hbase:meta on startup; helps debugging.
915    if (!this.assignmentManager.getRegionStates().hasTableRegionStates(TableName.META_TABLE_NAME)) {
916      Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
917        .filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
918      initMetaProc = optProc.orElseGet(() -> {
919        // schedule an init meta procedure if meta has not been deployed yet
920        InitMetaProcedure temp = new InitMetaProcedure();
921        procedureExecutor.submitProcedure(temp);
922        return temp;
923      });
924    }
925
926    // initialize load balancer
927    this.balancer.setMasterServices(this);
928    this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
929    this.balancer.initialize();
930
931    // start up all service threads.
932    status.setStatus("Initializing master service threads");
933    startServiceThreads();
934    // wait meta to be initialized after we start procedure executor
935    if (initMetaProc != null) {
936      initMetaProc.await();
937    }
938    // Wake up this server to check in
939    sleeper.skipSleepCycle();
940
941    // Wait for region servers to report in.
942    // With this as part of master initialization, it precludes our being able to start a single
943    // server that is both Master and RegionServer. Needs more thought. TODO.
944    String statusStr = "Wait for region servers to report in";
945    status.setStatus(statusStr);
946    LOG.info(Objects.toString(status));
947    waitForRegionServers(status);
948
949    // Check if master is shutting down because issue initializing regionservers or balancer.
950    if (isStopped()) {
951      return;
952    }
953
954    status.setStatus("Starting assignment manager");
955    // FIRST HBASE:META READ!!!!
956    // The below cannot make progress w/o hbase:meta being online.
957    // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
958    // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
959    // if it is down. It may take a while to come online. So, wait here until meta if for sure
960    // available. That's what waitForMetaOnline does.
961    if (!waitForMetaOnline()) {
962      return;
963    }
964    this.assignmentManager.joinCluster();
965    // The below depends on hbase:meta being online.
966    this.assignmentManager.processOfflineRegions();
967    // this must be called after the above processOfflineRegions to prevent race
968    this.assignmentManager.wakeMetaLoadedEvent();
969
970    // for migrating from a version without HBASE-25099, and also for honoring the configuration
971    // first.
972    if (conf.get(HConstants.META_REPLICAS_NUM) != null) {
973      int replicasNumInConf =
974        conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM);
975      TableDescriptor metaDesc = tableDescriptors.get(TableName.META_TABLE_NAME);
976      if (metaDesc.getRegionReplication() != replicasNumInConf) {
977        // it is possible that we already have some replicas before upgrading, so we must set the
978        // region replication number in meta TableDescriptor directly first, without creating a
979        // ModifyTableProcedure, otherwise it may cause a double assign for the meta replicas.
980        int existingReplicasCount =
981          assignmentManager.getRegionStates().getRegionsOfTable(TableName.META_TABLE_NAME).size();
982        if (existingReplicasCount > metaDesc.getRegionReplication()) {
983          LOG.info("Update replica count of hbase:meta from {}(in TableDescriptor)" +
984            " to {}(existing ZNodes)", metaDesc.getRegionReplication(), existingReplicasCount);
985          metaDesc = TableDescriptorBuilder.newBuilder(metaDesc)
986            .setRegionReplication(existingReplicasCount).build();
987          tableDescriptors.update(metaDesc);
988        }
989        // check again, and issue a ModifyTableProcedure if needed
990        if (metaDesc.getRegionReplication() != replicasNumInConf) {
991          LOG.info(
992            "The {} config is {} while the replica count in TableDescriptor is {}" +
993              " for hbase:meta, altering...",
994            HConstants.META_REPLICAS_NUM, replicasNumInConf, metaDesc.getRegionReplication());
995          procedureExecutor.submitProcedure(new ModifyTableProcedure(
996            procedureExecutor.getEnvironment(), TableDescriptorBuilder.newBuilder(metaDesc)
997              .setRegionReplication(replicasNumInConf).build(),
998            null, metaDesc, false));
999        }
1000      }
1001    }
1002    // Initialize after meta is up as below scans meta
1003    if (getFavoredNodesManager() != null && !maintenanceMode) {
1004      SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
1005          new SnapshotOfRegionAssignmentFromMeta(getConnection());
1006      snapshotOfRegionAssignment.initialize();
1007      getFavoredNodesManager().initialize(snapshotOfRegionAssignment);
1008    }
1009
1010    // set cluster status again after user regions are assigned
1011    this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1012
1013    // Start balancer and meta catalog janitor after meta and regions have been assigned.
1014    status.setStatus("Starting balancer and catalog janitor");
1015    this.clusterStatusChore = new ClusterStatusChore(this, balancer);
1016    getChoreService().scheduleChore(clusterStatusChore);
1017    this.balancerChore = new BalancerChore(this);
1018    getChoreService().scheduleChore(balancerChore);
1019    if (regionNormalizerManager != null) {
1020      getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore());
1021    }
1022    this.catalogJanitorChore = new CatalogJanitor(this);
1023    getChoreService().scheduleChore(catalogJanitorChore);
1024    this.hbckChore = new HbckChore(this);
1025    getChoreService().scheduleChore(hbckChore);
1026    this.serverManager.startChore();
1027
1028    // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
1029    if (!waitForNamespaceOnline()) {
1030      return;
1031    }
1032    status.setStatus("Starting cluster schema service");
1033    initClusterSchemaService();
1034
1035    if (this.cpHost != null) {
1036      try {
1037        this.cpHost.preMasterInitialization();
1038      } catch (IOException e) {
1039        LOG.error("Coprocessor preMasterInitialization() hook failed", e);
1040      }
1041    }
1042
1043    status.markComplete("Initialization successful");
1044    LOG.info(String.format("Master has completed initialization %.3fsec",
1045       (System.currentTimeMillis() - masterActiveTime) / 1000.0f));
1046    this.masterFinishedInitializationTime = System.currentTimeMillis();
1047    configurationManager.registerObserver(this.balancer);
1048    configurationManager.registerObserver(this.cleanerPool);
1049    configurationManager.registerObserver(this.hfileCleaner);
1050    configurationManager.registerObserver(this.logCleaner);
1051    configurationManager.registerObserver(this.regionsRecoveryConfigManager);
1052    // Set master as 'initialized'.
1053    setInitialized(true);
1054
1055    if (maintenanceMode) {
1056      LOG.info("Detected repair mode, skipping final initialization steps.");
1057      return;
1058    }
1059
1060    assignmentManager.checkIfShouldMoveSystemRegionAsync();
1061    status.setStatus("Starting quota manager");
1062    initQuotaManager();
1063    if (QuotaUtil.isQuotaEnabled(conf)) {
1064      // Create the quota snapshot notifier
1065      spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
1066      spaceQuotaSnapshotNotifier.initialize(getConnection());
1067      this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
1068      // Start the chore to read the region FS space reports and act on them
1069      getChoreService().scheduleChore(quotaObserverChore);
1070
1071      this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
1072      // Start the chore to read snapshots and add their usage to table/NS quotas
1073      getChoreService().scheduleChore(snapshotQuotaChore);
1074    }
1075    final SlowLogMasterService slowLogMasterService = new SlowLogMasterService(conf, this);
1076    slowLogMasterService.init();
1077
1078    // clear the dead servers with same host name and port of online server because we are not
1079    // removing dead server with same hostname and port of rs which is trying to check in before
1080    // master initialization. See HBASE-5916.
1081    this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
1082
1083    // Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
1084    status.setStatus("Checking ZNode ACLs");
1085    zooKeeper.checkAndSetZNodeAcls();
1086
1087    status.setStatus("Initializing MOB Cleaner");
1088    initMobCleaner();
1089
1090    status.setStatus("Calling postStartMaster coprocessors");
1091    if (this.cpHost != null) {
1092      // don't let cp initialization errors kill the master
1093      try {
1094        this.cpHost.postStartMaster();
1095      } catch (IOException ioe) {
1096        LOG.error("Coprocessor postStartMaster() hook failed", ioe);
1097      }
1098    }
1099
1100    zombieDetector.interrupt();
1101
1102    /*
1103     * After master has started up, lets do balancer post startup initialization. Since this runs
1104     * in activeMasterManager thread, it should be fine.
1105     */
1106    long start = System.currentTimeMillis();
1107    this.balancer.postMasterStartupInitialize();
1108    if (LOG.isDebugEnabled()) {
1109      LOG.debug("Balancer post startup initialization complete, took " + (
1110          (System.currentTimeMillis() - start) / 1000) + " seconds");
1111    }
1112  }
1113
1114  /**
1115   * Check hbase:meta is up and ready for reading. For use during Master startup only.
1116   * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
1117   *   and we will hold here until operator intervention.
1118   */
1119  @InterfaceAudience.Private
1120  public boolean waitForMetaOnline() {
1121    return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
1122  }
1123
1124  /**
1125   * @return True if region is online and scannable else false if an error or shutdown (Otherwise
1126   *   we just block in here holding up all forward-progess).
1127   */
1128  private boolean isRegionOnline(RegionInfo ri) {
1129    RetryCounter rc = null;
1130    while (!isStopped()) {
1131      RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri);
1132      if (rs.isOpened()) {
1133        if (this.getServerManager().isServerOnline(rs.getServerName())) {
1134          return true;
1135        }
1136      }
1137      // Region is not OPEN.
1138      Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures().
1139          stream().filter(p -> p instanceof ServerCrashProcedure).findAny();
1140      // TODO: Add a page to refguide on how to do repair. Have this log message point to it.
1141      // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and
1142      // then how to assign including how to break region lock if one held.
1143      LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " +
1144          "progress, in holding-pattern until region onlined.",
1145          ri.getRegionNameAsString(), rs, optProc.isPresent());
1146      // Check once-a-minute.
1147      if (rc == null) {
1148        rc = new RetryCounterFactory(Integer.MAX_VALUE, 1000, 60_000).create();
1149      }
1150      Threads.sleep(rc.getBackoffTimeAndIncrementAttempts());
1151    }
1152    return false;
1153  }
1154
1155  /**
1156   * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table
1157   * <p/>
1158   * This is for rolling upgrading, later we will migrate the data in ns table to the ns family of
1159   * meta table. And if this is a new cluster, this method will return immediately as there will be
1160   * no namespace table/region.
1161   * @return True if namespace table is up/online.
1162   */
1163  private boolean waitForNamespaceOnline() throws IOException {
1164    TableState nsTableState =
1165      MetaTableAccessor.getTableState(getConnection(), TableName.NAMESPACE_TABLE_NAME);
1166    if (nsTableState == null || nsTableState.isDisabled()) {
1167      // this means we have already migrated the data and disabled or deleted the namespace table,
1168      // or this is a new deploy which does not have a namespace table from the beginning.
1169      return true;
1170    }
1171    List<RegionInfo> ris =
1172      this.assignmentManager.getRegionStates().getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
1173    if (ris.isEmpty()) {
1174      // maybe this will not happen any more, but anyway, no harm to add a check here...
1175      return true;
1176    }
1177    // Else there are namespace regions up in meta. Ensure they are assigned before we go on.
1178    for (RegionInfo ri : ris) {
1179      if (!isRegionOnline(ri)) {
1180        return false;
1181      }
1182    }
1183    return true;
1184  }
1185
1186  /**
1187   * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to
1188   * automatically remove quotas for a table when that table is deleted.
1189   */
1190  @InterfaceAudience.Private
1191  public void updateConfigurationForQuotasObserver(Configuration conf) {
1192    // We're configured to not delete quotas on table deletion, so we don't need to add the obs.
1193    if (!conf.getBoolean(
1194          MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE,
1195          MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE_DEFAULT)) {
1196      return;
1197    }
1198    String[] masterCoprocs = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
1199    final int length = null == masterCoprocs ? 0 : masterCoprocs.length;
1200    String[] updatedCoprocs = new String[length + 1];
1201    if (length > 0) {
1202      System.arraycopy(masterCoprocs, 0, updatedCoprocs, 0, masterCoprocs.length);
1203    }
1204    updatedCoprocs[length] = MasterQuotasObserver.class.getName();
1205    conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, updatedCoprocs);
1206  }
1207
1208  private void initMobCleaner() {
1209    this.mobFileCleanerChore = new MobFileCleanerChore(this);
1210    getChoreService().scheduleChore(mobFileCleanerChore);
1211    this.mobFileCompactionChore = new MobFileCompactionChore(this);
1212    getChoreService().scheduleChore(mobFileCompactionChore);
1213  }
1214
1215  /**
1216   * <p>
1217   * Create a {@link ServerManager} instance.
1218   * </p>
1219   * <p>
1220   * Will be overridden in tests.
1221   * </p>
1222   */
1223  @InterfaceAudience.Private
1224  protected ServerManager createServerManager(final MasterServices master) throws IOException {
1225    // We put this out here in a method so can do a Mockito.spy and stub it out
1226    // w/ a mocked up ServerManager.
1227    setupClusterConnection();
1228    return new ServerManager(master);
1229  }
1230
1231  private void waitForRegionServers(final MonitoredTask status)
1232      throws IOException, InterruptedException {
1233    this.serverManager.waitForRegionServers(status);
1234  }
1235
1236  // Will be overridden in tests
1237  @InterfaceAudience.Private
1238  protected void initClusterSchemaService() throws IOException, InterruptedException {
1239    this.clusterSchemaService = new ClusterSchemaServiceImpl(this);
1240    this.clusterSchemaService.startAsync();
1241    try {
1242      this.clusterSchemaService.awaitRunning(getConfiguration().getInt(
1243        HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
1244        DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
1245    } catch (TimeoutException toe) {
1246      throw new IOException("Timedout starting ClusterSchemaService", toe);
1247    }
1248  }
1249
1250  private void initQuotaManager() throws IOException {
1251    MasterQuotaManager quotaManager = new MasterQuotaManager(this);
1252    quotaManager.start();
1253    this.quotaManager = quotaManager;
1254  }
1255
1256  private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() {
1257    SpaceQuotaSnapshotNotifier notifier =
1258        SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration());
1259    return notifier;
1260  }
1261
1262  boolean isCatalogJanitorEnabled() {
1263    return catalogJanitorChore != null ? catalogJanitorChore.getEnabled() : false;
1264  }
1265
1266  boolean isCleanerChoreEnabled() {
1267    boolean hfileCleanerFlag = true, logCleanerFlag = true;
1268
1269    if (hfileCleaner != null) {
1270      hfileCleanerFlag = hfileCleaner.getEnabled();
1271    }
1272
1273    if (logCleaner != null) {
1274      logCleanerFlag = logCleaner.getEnabled();
1275    }
1276
1277    return (hfileCleanerFlag && logCleanerFlag);
1278  }
1279
1280  @Override
1281  public ServerManager getServerManager() {
1282    return this.serverManager;
1283  }
1284
1285  @Override
1286  public MasterFileSystem getMasterFileSystem() {
1287    return this.fileSystemManager;
1288  }
1289
1290  @Override
1291  public MasterWalManager getMasterWalManager() {
1292    return this.walManager;
1293  }
1294
1295  @Override
1296  public SplitWALManager getSplitWALManager() {
1297    return splitWALManager;
1298  }
1299
1300  @Override
1301  public TableStateManager getTableStateManager() {
1302    return tableStateManager;
1303  }
1304
1305  /*
1306   * Start up all services. If any of these threads gets an unhandled exception
1307   * then they just die with a logged message.  This should be fine because
1308   * in general, we do not expect the master to get such unhandled exceptions
1309   *  as OOMEs; it should be lightly loaded. See what HRegionServer does if
1310   *  need to install an unexpected exception handler.
1311   */
1312  private void startServiceThreads() throws IOException {
1313    // Start the executor service pools
1314    this.executorService.startExecutorService(ExecutorType.MASTER_OPEN_REGION, conf.getInt(
1315      HConstants.MASTER_OPEN_REGION_THREADS, HConstants.MASTER_OPEN_REGION_THREADS_DEFAULT));
1316    this.executorService.startExecutorService(ExecutorType.MASTER_CLOSE_REGION, conf.getInt(
1317      HConstants.MASTER_CLOSE_REGION_THREADS, HConstants.MASTER_CLOSE_REGION_THREADS_DEFAULT));
1318    this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS,
1319      conf.getInt(HConstants.MASTER_SERVER_OPERATIONS_THREADS,
1320        HConstants.MASTER_SERVER_OPERATIONS_THREADS_DEFAULT));
1321    this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS,
1322      conf.getInt(HConstants.MASTER_META_SERVER_OPERATIONS_THREADS,
1323        HConstants.MASTER_META_SERVER_OPERATIONS_THREADS_DEFAULT));
1324    this.executorService.startExecutorService(ExecutorType.M_LOG_REPLAY_OPS, conf.getInt(
1325      HConstants.MASTER_LOG_REPLAY_OPS_THREADS, HConstants.MASTER_LOG_REPLAY_OPS_THREADS_DEFAULT));
1326    this.executorService.startExecutorService(ExecutorType.MASTER_SNAPSHOT_OPERATIONS, conf.getInt(
1327      SnapshotManager.SNAPSHOT_POOL_THREADS_KEY, SnapshotManager.SNAPSHOT_POOL_THREADS_DEFAULT));
1328
1329    // We depend on there being only one instance of this executor running
1330    // at a time. To do concurrency, would need fencing of enable/disable of
1331    // tables.
1332    // Any time changing this maxThreads to > 1, pls see the comment at
1333    // AccessController#postCompletedCreateTableAction
1334    this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, 1);
1335    startProcedureExecutor();
1336
1337    // Create cleaner thread pool
1338    cleanerPool = new DirScanPool(conf);
1339    Map<String, Object> params = new HashMap<>();
1340    params.put(MASTER, this);
1341    // Start log cleaner thread
1342    int cleanerInterval =
1343      conf.getInt(HBASE_MASTER_CLEANER_INTERVAL, DEFAULT_HBASE_MASTER_CLEANER_INTERVAL);
1344    this.logCleaner = new LogCleaner(cleanerInterval, this, conf,
1345      getMasterWalManager().getFileSystem(), getMasterWalManager().getOldLogDir(), cleanerPool, params);
1346    getChoreService().scheduleChore(logCleaner);
1347
1348    // start the hfile archive cleaner thread
1349    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1350    this.hfileCleaner = new HFileCleaner(cleanerInterval, this, conf,
1351      getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
1352    getChoreService().scheduleChore(hfileCleaner);
1353
1354    // Regions Reopen based on very high storeFileRefCount is considered enabled
1355    // only if hbase.regions.recovery.store.file.ref.count has value > 0
1356    final int maxStoreFileRefCount = conf.getInt(
1357      HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
1358      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
1359    if (maxStoreFileRefCount > 0) {
1360      this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
1361      getChoreService().scheduleChore(this.regionsRecoveryChore);
1362    } else {
1363      LOG.info("Reopening regions with very high storeFileRefCount is disabled. " +
1364          "Provide threshold value > 0 for {} to enable it.",
1365        HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
1366    }
1367
1368    this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
1369
1370    replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
1371      replicationPeerManager);
1372    getChoreService().scheduleChore(replicationBarrierCleaner);
1373
1374    final boolean isSnapshotChoreEnabled = this.snapshotCleanupTracker
1375        .isSnapshotCleanupEnabled();
1376    this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
1377    if (isSnapshotChoreEnabled) {
1378      getChoreService().scheduleChore(this.snapshotCleanerChore);
1379    } else {
1380      if (LOG.isTraceEnabled()) {
1381        LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore..");
1382      }
1383    }
1384    serviceStarted = true;
1385    if (LOG.isTraceEnabled()) {
1386      LOG.trace("Started service threads");
1387    }
1388  }
1389
1390  @Override
1391  protected void stopServiceThreads() {
1392    if (masterJettyServer != null) {
1393      LOG.info("Stopping master jetty server");
1394      try {
1395        masterJettyServer.stop();
1396      } catch (Exception e) {
1397        LOG.error("Failed to stop master jetty server", e);
1398      }
1399    }
1400    stopChores();
1401
1402    super.stopServiceThreads();
1403    if (cleanerPool != null) {
1404      cleanerPool.shutdownNow();
1405      cleanerPool = null;
1406    }
1407
1408    LOG.debug("Stopping service threads");
1409
1410    // stop procedure executor prior to other services such as server manager and assignment
1411    // manager, as these services are important for some running procedures. See HBASE-24117 for
1412    // example.
1413    stopProcedureExecutor();
1414
1415    if (regionNormalizerManager != null) {
1416      regionNormalizerManager.stop();
1417    }
1418    if (this.quotaManager != null) {
1419      this.quotaManager.stop();
1420    }
1421
1422    if (this.activeMasterManager != null) {
1423      this.activeMasterManager.stop();
1424    }
1425    if (this.serverManager != null) {
1426      this.serverManager.stop();
1427    }
1428    if (this.assignmentManager != null) {
1429      this.assignmentManager.stop();
1430    }
1431
1432    if (masterRegion != null) {
1433      masterRegion.close(isAborted());
1434    }
1435    if (this.walManager != null) {
1436      this.walManager.stop();
1437    }
1438    if (this.fileSystemManager != null) {
1439      this.fileSystemManager.stop();
1440    }
1441    if (this.mpmHost != null) {
1442      this.mpmHost.stop("server shutting down.");
1443    }
1444    if (this.regionServerTracker != null) {
1445      this.regionServerTracker.stop();
1446    }
1447  }
1448
1449  private void createProcedureExecutor() throws IOException {
1450    MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
1451    procedureStore =
1452      new RegionProcedureStore(this, masterRegion, new MasterProcedureEnv.FsUtilsLeaseRecovery(this));
1453    procedureStore.registerListener(new ProcedureStoreListener() {
1454
1455      @Override
1456      public void abortProcess() {
1457        abort("The Procedure Store lost the lease", null);
1458      }
1459    });
1460    MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
1461    procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
1462    configurationManager.registerObserver(procEnv);
1463
1464    int cpus = Runtime.getRuntime().availableProcessors();
1465    final int numThreads = conf.getInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, Math.max(
1466      (cpus > 0 ? cpus / 4 : 0), MasterProcedureConstants.DEFAULT_MIN_MASTER_PROCEDURE_THREADS));
1467    final boolean abortOnCorruption =
1468      conf.getBoolean(MasterProcedureConstants.EXECUTOR_ABORT_ON_CORRUPTION,
1469        MasterProcedureConstants.DEFAULT_EXECUTOR_ABORT_ON_CORRUPTION);
1470    procedureStore.start(numThreads);
1471    // Just initialize it but do not start the workers, we will start the workers later by calling
1472    // startProcedureExecutor. See the javadoc for finishActiveMasterInitialization for more
1473    // details.
1474    procedureExecutor.init(numThreads, abortOnCorruption);
1475    if (!procEnv.getRemoteDispatcher().start()) {
1476      throw new HBaseIOException("Failed start of remote dispatcher");
1477    }
1478  }
1479
1480  private void startProcedureExecutor() throws IOException {
1481    procedureExecutor.startWorkers();
1482  }
1483
1484  /**
1485   * Turn on/off Snapshot Cleanup Chore
1486   *
1487   * @param on indicates whether Snapshot Cleanup Chore is to be run
1488   */
1489  void switchSnapshotCleanup(final boolean on, final boolean synchronous) {
1490    if (synchronous) {
1491      synchronized (this.snapshotCleanerChore) {
1492        switchSnapshotCleanup(on);
1493      }
1494    } else {
1495      switchSnapshotCleanup(on);
1496    }
1497  }
1498
1499  private void switchSnapshotCleanup(final boolean on) {
1500    try {
1501      snapshotCleanupTracker.setSnapshotCleanupEnabled(on);
1502      if (on) {
1503        if (!getChoreService().isChoreScheduled(this.snapshotCleanerChore)) {
1504          getChoreService().scheduleChore(this.snapshotCleanerChore);
1505        }
1506      } else {
1507        getChoreService().cancelChore(this.snapshotCleanerChore);
1508      }
1509    } catch (KeeperException e) {
1510      LOG.error("Error updating snapshot cleanup mode to {}", on, e);
1511    }
1512  }
1513
1514
1515  private void stopProcedureExecutor() {
1516    if (procedureExecutor != null) {
1517      configurationManager.deregisterObserver(procedureExecutor.getEnvironment());
1518      procedureExecutor.getEnvironment().getRemoteDispatcher().stop();
1519      procedureExecutor.stop();
1520      procedureExecutor.join();
1521      procedureExecutor = null;
1522    }
1523
1524    if (procedureStore != null) {
1525      procedureStore.stop(isAborted());
1526      procedureStore = null;
1527    }
1528  }
1529
1530  private void stopChores() {
1531    ChoreService choreService = getChoreService();
1532    if (choreService != null) {
1533      choreService.cancelChore(this.mobFileCleanerChore);
1534      choreService.cancelChore(this.mobFileCompactionChore);
1535      choreService.cancelChore(this.balancerChore);
1536      if (regionNormalizerManager != null) {
1537        choreService.cancelChore(regionNormalizerManager.getRegionNormalizerChore());
1538      }
1539      choreService.cancelChore(this.clusterStatusChore);
1540      choreService.cancelChore(this.catalogJanitorChore);
1541      choreService.cancelChore(this.clusterStatusPublisherChore);
1542      choreService.cancelChore(this.snapshotQuotaChore);
1543      choreService.cancelChore(this.logCleaner);
1544      choreService.cancelChore(this.hfileCleaner);
1545      choreService.cancelChore(this.replicationBarrierCleaner);
1546      choreService.cancelChore(this.snapshotCleanerChore);
1547      choreService.cancelChore(this.hbckChore);
1548      choreService.cancelChore(this.regionsRecoveryChore);
1549    }
1550  }
1551
1552  /**
1553   * @return Get remote side's InetAddress
1554   */
1555  InetAddress getRemoteInetAddress(final int port,
1556      final long serverStartCode) throws UnknownHostException {
1557    // Do it out here in its own little method so can fake an address when
1558    // mocking up in tests.
1559    InetAddress ia = RpcServer.getRemoteIp();
1560
1561    // The call could be from the local regionserver,
1562    // in which case, there is no remote address.
1563    if (ia == null && serverStartCode == startcode) {
1564      InetSocketAddress isa = rpcServices.getSocketAddress();
1565      if (isa != null && isa.getPort() == port) {
1566        ia = isa.getAddress();
1567      }
1568    }
1569    return ia;
1570  }
1571
1572  /**
1573   * @return Maximum time we should run balancer for
1574   */
1575  private int getMaxBalancingTime() {
1576    // if max balancing time isn't set, defaulting it to period time
1577    int maxBalancingTime = getConfiguration().getInt(HConstants.HBASE_BALANCER_MAX_BALANCING,
1578      getConfiguration()
1579        .getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
1580    return maxBalancingTime;
1581  }
1582
1583  /**
1584   * @return Maximum number of regions in transition
1585   */
1586  private int getMaxRegionsInTransition() {
1587    int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size();
1588    return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1);
1589  }
1590
1591  /**
1592   * It first sleep to the next balance plan start time. Meanwhile, throttling by the max
1593   * number regions in transition to protect availability.
1594   * @param nextBalanceStartTime The next balance plan start time
1595   * @param maxRegionsInTransition max number of regions in transition
1596   * @param cutoffTime when to exit balancer
1597   */
1598  private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition,
1599      long cutoffTime) {
1600    boolean interrupted = false;
1601
1602    // Sleep to next balance plan start time
1603    // But if there are zero regions in transition, it can skip sleep to speed up.
1604    while (!interrupted && System.currentTimeMillis() < nextBalanceStartTime
1605        && this.assignmentManager.getRegionStates().hasRegionsInTransition()) {
1606      try {
1607        Thread.sleep(100);
1608      } catch (InterruptedException ie) {
1609        interrupted = true;
1610      }
1611    }
1612
1613    // Throttling by max number regions in transition
1614    while (!interrupted
1615        && maxRegionsInTransition > 0
1616        && this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
1617        >= maxRegionsInTransition && System.currentTimeMillis() <= cutoffTime) {
1618      try {
1619        // sleep if the number of regions in transition exceeds the limit
1620        Thread.sleep(100);
1621      } catch (InterruptedException ie) {
1622        interrupted = true;
1623      }
1624    }
1625
1626    if (interrupted) Thread.currentThread().interrupt();
1627  }
1628
1629  public boolean balance() throws IOException {
1630    return balance(false);
1631  }
1632
1633  /**
1634   * Checks master state before initiating action over region topology.
1635   * @param action the name of the action under consideration, for logging.
1636   * @return {@code true} when the caller should exit early, {@code false} otherwise.
1637   */
1638  @Override
1639  public boolean skipRegionManagementAction(final String action) {
1640    // Note: this method could be `default` on MasterServices if but for logging.
1641    if (!isInitialized()) {
1642      LOG.debug("Master has not been initialized, don't run {}.", action);
1643      return true;
1644    }
1645    if (this.getServerManager().isClusterShutdown()) {
1646      LOG.info("Cluster is shutting down, don't run {}.", action);
1647      return true;
1648    }
1649    if (isInMaintenanceMode()) {
1650      LOG.info("Master is in maintenance mode, don't run {}.", action);
1651      return true;
1652    }
1653    return false;
1654  }
1655
1656  public boolean balance(boolean force) throws IOException {
1657    if (loadBalancerTracker == null || !loadBalancerTracker.isBalancerOn()) {
1658      return false;
1659    }
1660    if (skipRegionManagementAction("balancer")) {
1661      return false;
1662    }
1663
1664    synchronized (this.balancer) {
1665        // Only allow one balance run at at time.
1666      if (this.assignmentManager.hasRegionsInTransition()) {
1667        List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
1668        // if hbase:meta region is in transition, result of assignment cannot be recorded
1669        // ignore the force flag in that case
1670        boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
1671        String prefix = force && !metaInTransition ? "R" : "Not r";
1672        List<RegionStateNode> toPrint = regionsInTransition;
1673        int max = 5;
1674        boolean truncated = false;
1675        if (regionsInTransition.size() > max) {
1676          toPrint = regionsInTransition.subList(0, max);
1677          truncated = true;
1678        }
1679        LOG.info(prefix + " not running balancer because " + regionsInTransition.size() +
1680          " region(s) in transition: " + toPrint + (truncated? "(truncated list)": ""));
1681        if (!force || metaInTransition) return false;
1682      }
1683      if (this.serverManager.areDeadServersInProgress()) {
1684        LOG.info("Not running balancer because processing dead regionserver(s): " +
1685          this.serverManager.getDeadServers());
1686        return false;
1687      }
1688
1689      if (this.cpHost != null) {
1690        try {
1691          if (this.cpHost.preBalance()) {
1692            LOG.debug("Coprocessor bypassing balancer request");
1693            return false;
1694          }
1695        } catch (IOException ioe) {
1696          LOG.error("Error invoking master coprocessor preBalance()", ioe);
1697          return false;
1698        }
1699      }
1700
1701      Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
1702        this.assignmentManager.getRegionStates()
1703          .getAssignmentsForBalancer(tableStateManager, this.serverManager.getOnlineServersList());
1704      for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
1705        serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
1706      }
1707
1708      //Give the balancer the current cluster state.
1709      this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
1710
1711      List<RegionPlan> plans = this.balancer.balanceCluster(assignments);
1712
1713      if (skipRegionManagementAction("balancer")) {
1714        // make one last check that the cluster isn't shutting down before proceeding.
1715        return false;
1716      }
1717
1718      List<RegionPlan> sucRPs = executeRegionPlansWithThrottling(plans);
1719
1720      if (this.cpHost != null) {
1721        try {
1722          this.cpHost.postBalance(sucRPs);
1723        } catch (IOException ioe) {
1724          // balancing already succeeded so don't change the result
1725          LOG.error("Error invoking master coprocessor postBalance()", ioe);
1726        }
1727      }
1728    }
1729    // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
1730    // Return true indicating a success.
1731    return true;
1732  }
1733
1734  /**
1735   * Execute region plans with throttling
1736   * @param plans to execute
1737   * @return succeeded plans
1738   */
1739  public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
1740    List<RegionPlan> successRegionPlans = new ArrayList<>();
1741    int maxRegionsInTransition = getMaxRegionsInTransition();
1742    long balanceStartTime = System.currentTimeMillis();
1743    long cutoffTime = balanceStartTime + this.maxBalancingTime;
1744    int rpCount = 0;  // number of RegionPlans balanced so far
1745    if (plans != null && !plans.isEmpty()) {
1746      int balanceInterval = this.maxBalancingTime / plans.size();
1747      LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is "
1748          + balanceInterval + " ms, and the max number regions in transition is "
1749          + maxRegionsInTransition);
1750
1751      for (RegionPlan plan: plans) {
1752        LOG.info("balance " + plan);
1753        //TODO: bulk assign
1754        try {
1755          this.assignmentManager.moveAsync(plan);
1756        } catch (HBaseIOException hioe) {
1757          //should ignore failed plans here, avoiding the whole balance plans be aborted
1758          //later calls of balance() can fetch up the failed and skipped plans
1759          LOG.warn("Failed balance plan {}, skipping...", plan, hioe);
1760        }
1761        //rpCount records balance plans processed, does not care if a plan succeeds
1762        rpCount++;
1763        successRegionPlans.add(plan);
1764
1765        if (this.maxBalancingTime > 0) {
1766          balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition,
1767            cutoffTime);
1768        }
1769
1770        // if performing next balance exceeds cutoff time, exit the loop
1771        if (this.maxBalancingTime > 0 && rpCount < plans.size()
1772          && System.currentTimeMillis() > cutoffTime) {
1773          // TODO: After balance, there should not be a cutoff time (keeping it as
1774          // a security net for now)
1775          LOG.debug("No more balancing till next balance run; maxBalanceTime="
1776              + this.maxBalancingTime);
1777          break;
1778        }
1779      }
1780    }
1781    return successRegionPlans;
1782  }
1783
1784  @Override
1785  public RegionNormalizerManager getRegionNormalizerManager() {
1786    return regionNormalizerManager;
1787  }
1788
1789  @Override
1790  public boolean normalizeRegions(
1791    final NormalizeTableFilterParams ntfp,
1792    final boolean isHighPriority
1793  ) throws IOException {
1794    if (regionNormalizerManager == null || !regionNormalizerManager.isNormalizerOn()) {
1795      LOG.debug("Region normalization is disabled, don't run region normalizer.");
1796      return false;
1797    }
1798    if (skipRegionManagementAction("region normalizer")) {
1799      return false;
1800    }
1801    if (assignmentManager.hasRegionsInTransition()) {
1802      return false;
1803    }
1804
1805    final Set<TableName> matchingTables = getTableDescriptors(new LinkedList<>(),
1806      ntfp.getNamespace(), ntfp.getRegex(), ntfp.getTableNames(), false)
1807      .stream()
1808      .map(TableDescriptor::getTableName)
1809      .collect(Collectors.toSet());
1810    final Set<TableName> allEnabledTables =
1811      tableStateManager.getTablesInStates(TableState.State.ENABLED);
1812    final List<TableName> targetTables =
1813      new ArrayList<>(Sets.intersection(matchingTables, allEnabledTables));
1814    Collections.shuffle(targetTables);
1815    return regionNormalizerManager.normalizeRegions(targetTables, isHighPriority);
1816  }
1817
1818  /**
1819   * @return Client info for use as prefix on an audit log string; who did an action
1820   */
1821  @Override
1822  public String getClientIdAuditPrefix() {
1823    return "Client=" + RpcServer.getRequestUserName().orElse(null)
1824        + "/" + RpcServer.getRemoteAddress().orElse(null);
1825  }
1826
1827  /**
1828   * Switch for the background CatalogJanitor thread.
1829   * Used for testing.  The thread will continue to run.  It will just be a noop
1830   * if disabled.
1831   * @param b If false, the catalog janitor won't do anything.
1832   */
1833  public void setCatalogJanitorEnabled(final boolean b) {
1834    this.catalogJanitorChore.setEnabled(b);
1835  }
1836
1837  @Override
1838  public long mergeRegions(
1839      final RegionInfo[] regionsToMerge,
1840      final boolean forcible,
1841      final long ng,
1842      final long nonce) throws IOException {
1843    checkInitialized();
1844
1845    if (!isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {
1846      String regionsStr = Arrays.deepToString(regionsToMerge);
1847      LOG.warn("Merge switch is off! skip merge of " + regionsStr);
1848      throw new DoNotRetryIOException("Merge of " + regionsStr +
1849          " failed because merge switch is off");
1850    }
1851
1852    final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(RegionInfo::getEncodedName)
1853      .collect(Collectors.joining(", "));
1854    return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
1855      @Override
1856      protected void run() throws IOException {
1857        getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge);
1858        String aid = getClientIdAuditPrefix();
1859        LOG.info("{} merge regions {}", aid, mergeRegionsStr);
1860        submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(),
1861            regionsToMerge, forcible));
1862        getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge);
1863      }
1864
1865      @Override
1866      protected String getDescription() {
1867        return "MergeTableProcedure";
1868      }
1869    });
1870  }
1871
1872  @Override
1873  public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow,
1874      final long nonceGroup, final long nonce)
1875  throws IOException {
1876    checkInitialized();
1877
1878    if (!isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
1879      LOG.warn("Split switch is off! skip split of " + regionInfo);
1880      throw new DoNotRetryIOException("Split region " + regionInfo.getRegionNameAsString() +
1881          " failed due to split switch off");
1882    }
1883
1884    return MasterProcedureUtil.submitProcedure(
1885        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
1886      @Override
1887      protected void run() throws IOException {
1888        getMaster().getMasterCoprocessorHost().preSplitRegion(regionInfo.getTable(), splitRow);
1889        LOG.info(getClientIdAuditPrefix() + " split " + regionInfo.getRegionNameAsString());
1890
1891        // Execute the operation asynchronously
1892        submitProcedure(getAssignmentManager().createSplitProcedure(regionInfo, splitRow));
1893      }
1894
1895      @Override
1896      protected String getDescription() {
1897        return "SplitTableProcedure";
1898      }
1899    });
1900  }
1901
1902  private void warmUpRegion(ServerName server, RegionInfo region) {
1903    FutureUtils.addListener(asyncClusterConnection.getRegionServerAdmin(server)
1904      .warmupRegion(RequestConverter.buildWarmupRegionRequest(region)), (r, e) -> {
1905        if (e != null) {
1906          LOG.warn("Failed to warm up region {} on server {}", region, server, e);
1907        }
1908      });
1909  }
1910
1911  // Public so can be accessed by tests. Blocks until move is done.
1912  // Replace with an async implementation from which you can get
1913  // a success/failure result.
1914  @InterfaceAudience.Private
1915  public void move(final byte[] encodedRegionName, byte[] destServerName) throws IOException {
1916    RegionState regionState = assignmentManager.getRegionStates().
1917      getRegionState(Bytes.toString(encodedRegionName));
1918
1919    RegionInfo hri;
1920    if (regionState != null) {
1921      hri = regionState.getRegion();
1922    } else {
1923      throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
1924    }
1925
1926    ServerName dest;
1927    List<ServerName> exclude = hri.getTable().isSystemTable() ? assignmentManager.getExcludedServersForSystemTable()
1928        : new ArrayList<>(1);
1929    if (destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))) {
1930      LOG.info(
1931          Bytes.toString(encodedRegionName) + " can not move to " + Bytes.toString(destServerName)
1932              + " because the server is in exclude list");
1933      destServerName = null;
1934    }
1935    if (destServerName == null || destServerName.length == 0) {
1936      LOG.info("Passed destination servername is null/empty so " +
1937        "choosing a server at random");
1938      exclude.add(regionState.getServerName());
1939      final List<ServerName> destServers = this.serverManager.createDestinationServersList(exclude);
1940      dest = balancer.randomAssignment(hri, destServers);
1941      if (dest == null) {
1942        LOG.debug("Unable to determine a plan to assign " + hri);
1943        return;
1944      }
1945    } else {
1946      ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
1947      dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
1948      if (dest == null) {
1949        LOG.debug("Unable to determine a plan to assign " + hri);
1950        return;
1951      }
1952      // TODO: deal with table on master for rs group.
1953      if (dest.equals(serverName)) {
1954        // To avoid unnecessary region moving later by balancer. Don't put user
1955        // regions on master.
1956        LOG.debug("Skipping move of region " + hri.getRegionNameAsString() +
1957          " to avoid unnecessary region moving later by load balancer," +
1958          " because it should not be on master");
1959        return;
1960      }
1961    }
1962
1963    if (dest.equals(regionState.getServerName())) {
1964      LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
1965        + " because region already assigned to the same server " + dest + ".");
1966      return;
1967    }
1968
1969    // Now we can do the move
1970    RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
1971    assert rp.getDestination() != null: rp.toString() + " " + dest;
1972
1973    try {
1974      checkInitialized();
1975      if (this.cpHost != null) {
1976        this.cpHost.preMove(hri, rp.getSource(), rp.getDestination());
1977      }
1978
1979      TransitRegionStateProcedure proc =
1980        this.assignmentManager.createMoveRegionProcedure(rp.getRegionInfo(), rp.getDestination());
1981      // Warmup the region on the destination before initiating the move.
1982      // A region server could reject the close request because it either does not
1983      // have the specified region or the region is being split.
1984      warmUpRegion(rp.getDestination(), hri);
1985
1986      LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
1987      Future<byte[]> future = ProcedureSyncWait.submitProcedure(this.procedureExecutor, proc);
1988      try {
1989        // Is this going to work? Will we throw exception on error?
1990        // TODO: CompletableFuture rather than this stunted Future.
1991        future.get();
1992      } catch (InterruptedException | ExecutionException e) {
1993        throw new HBaseIOException(e);
1994      }
1995      if (this.cpHost != null) {
1996        this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
1997      }
1998    } catch (IOException ioe) {
1999      if (ioe instanceof HBaseIOException) {
2000        throw (HBaseIOException)ioe;
2001      }
2002      throw new HBaseIOException(ioe);
2003    }
2004  }
2005
2006  @Override
2007  public long createTable(final TableDescriptor tableDescriptor, final byte[][] splitKeys,
2008      final long nonceGroup, final long nonce) throws IOException {
2009    checkInitialized();
2010    TableDescriptor desc = getMasterCoprocessorHost().preCreateTableRegionsInfos(tableDescriptor);
2011    if (desc == null) {
2012      throw new IOException("Creation for " + tableDescriptor + " is canceled by CP");
2013    }
2014    String namespace = desc.getTableName().getNamespaceAsString();
2015    this.clusterSchemaService.getNamespace(namespace);
2016
2017    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(desc, splitKeys);
2018    TableDescriptorChecker.sanityCheck(conf, desc);
2019
2020    return MasterProcedureUtil
2021      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2022        @Override
2023        protected void run() throws IOException {
2024          getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions);
2025
2026          LOG.info(getClientIdAuditPrefix() + " create " + desc);
2027
2028          // TODO: We can handle/merge duplicate requests, and differentiate the case of
2029          // TableExistsException by saying if the schema is the same or not.
2030          //
2031          // We need to wait for the procedure to potentially fail due to "prepare" sanity
2032          // checks. This will block only the beginning of the procedure. See HBASE-19953.
2033          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2034          submitProcedure(
2035            new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch));
2036          latch.await();
2037
2038          getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions);
2039        }
2040
2041        @Override
2042        protected String getDescription() {
2043          return "CreateTableProcedure";
2044        }
2045      });
2046  }
2047
2048  @Override
2049  public long createSystemTable(final TableDescriptor tableDescriptor) throws IOException {
2050    if (isStopped()) {
2051      throw new MasterNotRunningException();
2052    }
2053
2054    TableName tableName = tableDescriptor.getTableName();
2055    if (!(tableName.isSystemTable())) {
2056      throw new IllegalArgumentException(
2057        "Only system table creation can use this createSystemTable API");
2058    }
2059
2060    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, null);
2061
2062    LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
2063
2064    // This special create table is called locally to master.  Therefore, no RPC means no need
2065    // to use nonce to detect duplicated RPC call.
2066    long procId = this.procedureExecutor.submitProcedure(
2067      new CreateTableProcedure(procedureExecutor.getEnvironment(), tableDescriptor, newRegions));
2068
2069    return procId;
2070  }
2071
2072  private void startActiveMasterManager(int infoPort) throws KeeperException {
2073    String backupZNode = ZNodePaths.joinZNode(
2074      zooKeeper.getZNodePaths().backupMasterAddressesZNode, serverName.toString());
2075    /*
2076    * Add a ZNode for ourselves in the backup master directory since we
2077    * may not become the active master. If so, we want the actual active
2078    * master to know we are backup masters, so that it won't assign
2079    * regions to us if so configured.
2080    *
2081    * If we become the active master later, ActiveMasterManager will delete
2082    * this node explicitly.  If we crash before then, ZooKeeper will delete
2083    * this node for us since it is ephemeral.
2084    */
2085    LOG.info("Adding backup master ZNode " + backupZNode);
2086    if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
2087      LOG.warn("Failed create of " + backupZNode + " by " + serverName);
2088    }
2089    this.activeMasterManager.setInfoPort(infoPort);
2090    int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
2091    // If we're a backup master, stall until a primary to write this address
2092    if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
2093      LOG.debug("HMaster started in backup mode. Stalling until master znode is written.");
2094      // This will only be a minute or so while the cluster starts up,
2095      // so don't worry about setting watches on the parent znode
2096      while (!activeMasterManager.hasActiveMaster()) {
2097        LOG.debug("Waiting for master address and cluster state znode to be written.");
2098        Threads.sleep(timeout);
2099      }
2100    }
2101    MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
2102    status.setDescription("Master startup");
2103    try {
2104      if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
2105        finishActiveMasterInitialization(status);
2106      }
2107    } catch (Throwable t) {
2108      status.setStatus("Failed to become active: " + t.getMessage());
2109      LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
2110      // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
2111      if (t instanceof NoClassDefFoundError && t.getMessage().
2112          contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")) {
2113        // improved error message for this special case
2114        abort("HBase is having a problem with its Hadoop jars.  You may need to recompile " +
2115          "HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion() +
2116          " or change your hadoop jars to start properly", t);
2117      } else {
2118        abort("Unhandled exception. Starting shutdown.", t);
2119      }
2120    } finally {
2121      status.cleanup();
2122    }
2123  }
2124
2125  private static boolean isCatalogTable(final TableName tableName) {
2126    return tableName.equals(TableName.META_TABLE_NAME);
2127  }
2128
2129  @Override
2130  public long deleteTable(
2131      final TableName tableName,
2132      final long nonceGroup,
2133      final long nonce) throws IOException {
2134    checkInitialized();
2135
2136    return MasterProcedureUtil.submitProcedure(
2137        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2138      @Override
2139      protected void run() throws IOException {
2140        getMaster().getMasterCoprocessorHost().preDeleteTable(tableName);
2141
2142        LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
2143
2144        // TODO: We can handle/merge duplicate request
2145        //
2146        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2147        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2148        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2149        submitProcedure(new DeleteTableProcedure(procedureExecutor.getEnvironment(),
2150            tableName, latch));
2151        latch.await();
2152
2153        getMaster().getMasterCoprocessorHost().postDeleteTable(tableName);
2154      }
2155
2156      @Override
2157      protected String getDescription() {
2158        return "DeleteTableProcedure";
2159      }
2160    });
2161  }
2162
2163  @Override
2164  public long truncateTable(
2165      final TableName tableName,
2166      final boolean preserveSplits,
2167      final long nonceGroup,
2168      final long nonce) throws IOException {
2169    checkInitialized();
2170
2171    return MasterProcedureUtil.submitProcedure(
2172        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2173      @Override
2174      protected void run() throws IOException {
2175        getMaster().getMasterCoprocessorHost().preTruncateTable(tableName);
2176
2177        LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
2178        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
2179        submitProcedure(new TruncateTableProcedure(procedureExecutor.getEnvironment(),
2180            tableName, preserveSplits, latch));
2181        latch.await();
2182
2183        getMaster().getMasterCoprocessorHost().postTruncateTable(tableName);
2184      }
2185
2186      @Override
2187      protected String getDescription() {
2188        return "TruncateTableProcedure";
2189      }
2190    });
2191  }
2192
2193  @Override
2194  public long addColumn(final TableName tableName, final ColumnFamilyDescriptor column,
2195      final long nonceGroup, final long nonce) throws IOException {
2196    checkInitialized();
2197    checkTableExists(tableName);
2198
2199    return modifyTable(tableName, new TableDescriptorGetter() {
2200
2201      @Override
2202      public TableDescriptor get() throws IOException {
2203        TableDescriptor old = getTableDescriptors().get(tableName);
2204        if (old.hasColumnFamily(column.getName())) {
2205          throw new InvalidFamilyOperationException("Column family '" + column.getNameAsString()
2206              + "' in table '" + tableName + "' already exists so cannot be added");
2207        }
2208
2209        return TableDescriptorBuilder.newBuilder(old).setColumnFamily(column).build();
2210      }
2211    }, nonceGroup, nonce, true);
2212  }
2213
2214  /**
2215   * Implement to return TableDescriptor after pre-checks
2216   */
2217  protected interface TableDescriptorGetter {
2218    TableDescriptor get() throws IOException;
2219  }
2220
2221  @Override
2222  public long modifyColumn(final TableName tableName, final ColumnFamilyDescriptor descriptor,
2223      final long nonceGroup, final long nonce) throws IOException {
2224    checkInitialized();
2225    checkTableExists(tableName);
2226    return modifyTable(tableName, new TableDescriptorGetter() {
2227
2228      @Override
2229      public TableDescriptor get() throws IOException {
2230        TableDescriptor old = getTableDescriptors().get(tableName);
2231        if (!old.hasColumnFamily(descriptor.getName())) {
2232          throw new InvalidFamilyOperationException("Family '" + descriptor.getNameAsString()
2233              + "' does not exist, so it cannot be modified");
2234        }
2235
2236        return TableDescriptorBuilder.newBuilder(old).modifyColumnFamily(descriptor).build();
2237      }
2238    }, nonceGroup, nonce, true);
2239  }
2240
2241  @Override
2242  public long deleteColumn(final TableName tableName, final byte[] columnName,
2243      final long nonceGroup, final long nonce) throws IOException {
2244    checkInitialized();
2245    checkTableExists(tableName);
2246
2247    return modifyTable(tableName, new TableDescriptorGetter() {
2248
2249      @Override
2250      public TableDescriptor get() throws IOException {
2251        TableDescriptor old = getTableDescriptors().get(tableName);
2252
2253        if (!old.hasColumnFamily(columnName)) {
2254          throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2255              + "' does not exist, so it cannot be deleted");
2256        }
2257        if (old.getColumnFamilyCount() == 1) {
2258          throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2259              + "' is the only column family in the table, so it cannot be deleted");
2260        }
2261        return TableDescriptorBuilder.newBuilder(old).removeColumnFamily(columnName).build();
2262      }
2263    }, nonceGroup, nonce, true);
2264  }
2265
2266  @Override
2267  public long enableTable(final TableName tableName, final long nonceGroup, final long nonce)
2268      throws IOException {
2269    checkInitialized();
2270
2271    return MasterProcedureUtil.submitProcedure(
2272        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2273      @Override
2274      protected void run() throws IOException {
2275        getMaster().getMasterCoprocessorHost().preEnableTable(tableName);
2276
2277        // Normally, it would make sense for this authorization check to exist inside
2278        // AccessController, but because the authorization check is done based on internal state
2279        // (rather than explicit permissions) we'll do the check here instead of in the
2280        // coprocessor.
2281        MasterQuotaManager quotaManager = getMasterQuotaManager();
2282        if (quotaManager != null) {
2283          if (quotaManager.isQuotaInitialized()) {
2284              SpaceQuotaSnapshot currSnapshotOfTable =
2285                  QuotaTableUtil.getCurrentSnapshotFromQuotaTable(getConnection(), tableName);
2286              if (currSnapshotOfTable != null) {
2287                SpaceQuotaStatus quotaStatus = currSnapshotOfTable.getQuotaStatus();
2288                if (quotaStatus.isInViolation()
2289                    && SpaceViolationPolicy.DISABLE == quotaStatus.getPolicy().orElse(null)) {
2290                throw new AccessDeniedException("Enabling the table '" + tableName
2291                    + "' is disallowed due to a violated space quota.");
2292              }
2293            }
2294          } else if (LOG.isTraceEnabled()) {
2295            LOG.trace("Unable to check for space quotas as the MasterQuotaManager is not enabled");
2296          }
2297        }
2298
2299        LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
2300
2301        // Execute the operation asynchronously - client will check the progress of the operation
2302        // In case the request is from a <1.1 client before returning,
2303        // we want to make sure that the table is prepared to be
2304        // enabled (the table is locked and the table state is set).
2305        // Note: if the procedure throws exception, we will catch it and rethrow.
2306        final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
2307        submitProcedure(new EnableTableProcedure(procedureExecutor.getEnvironment(),
2308            tableName, prepareLatch));
2309        prepareLatch.await();
2310
2311        getMaster().getMasterCoprocessorHost().postEnableTable(tableName);
2312      }
2313
2314      @Override
2315      protected String getDescription() {
2316        return "EnableTableProcedure";
2317      }
2318    });
2319  }
2320
2321  @Override
2322  public long disableTable(final TableName tableName, final long nonceGroup, final long nonce)
2323      throws IOException {
2324    checkInitialized();
2325
2326    return MasterProcedureUtil.submitProcedure(
2327        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2328      @Override
2329      protected void run() throws IOException {
2330        getMaster().getMasterCoprocessorHost().preDisableTable(tableName);
2331
2332        LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
2333
2334        // Execute the operation asynchronously - client will check the progress of the operation
2335        // In case the request is from a <1.1 client before returning,
2336        // we want to make sure that the table is prepared to be
2337        // enabled (the table is locked and the table state is set).
2338        // Note: if the procedure throws exception, we will catch it and rethrow.
2339        //
2340        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2341        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2342        final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createBlockingLatch();
2343        submitProcedure(new DisableTableProcedure(procedureExecutor.getEnvironment(),
2344            tableName, false, prepareLatch));
2345        prepareLatch.await();
2346
2347        getMaster().getMasterCoprocessorHost().postDisableTable(tableName);
2348      }
2349
2350      @Override
2351      protected String getDescription() {
2352        return "DisableTableProcedure";
2353      }
2354    });
2355  }
2356
2357  private long modifyTable(final TableName tableName,
2358      final TableDescriptorGetter newDescriptorGetter, final long nonceGroup, final long nonce,
2359      final boolean shouldCheckDescriptor) throws IOException {
2360    return MasterProcedureUtil
2361        .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2362          @Override
2363          protected void run() throws IOException {
2364            TableDescriptor oldDescriptor = getMaster().getTableDescriptors().get(tableName);
2365            TableDescriptor newDescriptor = getMaster().getMasterCoprocessorHost()
2366                .preModifyTable(tableName, oldDescriptor, newDescriptorGetter.get());
2367            TableDescriptorChecker.sanityCheck(conf, newDescriptor);
2368            LOG.info("{} modify table {} from {} to {}", getClientIdAuditPrefix(), tableName,
2369                oldDescriptor, newDescriptor);
2370
2371            // Execute the operation synchronously - wait for the operation completes before
2372            // continuing.
2373            //
2374            // We need to wait for the procedure to potentially fail due to "prepare" sanity
2375            // checks. This will block only the beginning of the procedure. See HBASE-19953.
2376            ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2377            submitProcedure(new ModifyTableProcedure(procedureExecutor.getEnvironment(),
2378                newDescriptor, latch, oldDescriptor, shouldCheckDescriptor));
2379            latch.await();
2380
2381            getMaster().getMasterCoprocessorHost().postModifyTable(tableName, oldDescriptor,
2382              newDescriptor);
2383          }
2384
2385          @Override
2386          protected String getDescription() {
2387            return "ModifyTableProcedure";
2388          }
2389        });
2390
2391  }
2392
2393  @Override
2394  public long modifyTable(final TableName tableName, final TableDescriptor newDescriptor,
2395      final long nonceGroup, final long nonce) throws IOException {
2396    checkInitialized();
2397    return modifyTable(tableName, new TableDescriptorGetter() {
2398      @Override
2399      public TableDescriptor get() throws IOException {
2400        return newDescriptor;
2401      }
2402    }, nonceGroup, nonce, false);
2403
2404  }
2405
2406  public long restoreSnapshot(final SnapshotDescription snapshotDesc,
2407      final long nonceGroup, final long nonce, final boolean restoreAcl) throws IOException {
2408    checkInitialized();
2409    getSnapshotManager().checkSnapshotSupport();
2410
2411    // Ensure namespace exists. Will throw exception if non-known NS.
2412    final TableName dstTable = TableName.valueOf(snapshotDesc.getTable());
2413    getClusterSchema().getNamespace(dstTable.getNamespaceAsString());
2414
2415    return MasterProcedureUtil.submitProcedure(
2416        new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2417      @Override
2418      protected void run() throws IOException {
2419          setProcId(
2420            getSnapshotManager().restoreOrCloneSnapshot(snapshotDesc, getNonceKey(), restoreAcl));
2421      }
2422
2423      @Override
2424      protected String getDescription() {
2425        return "RestoreSnapshotProcedure";
2426      }
2427    });
2428  }
2429
2430  private void checkTableExists(final TableName tableName)
2431    throws IOException, TableNotFoundException {
2432    if (!tableDescriptors.exists(tableName)) {
2433      throw new TableNotFoundException(tableName);
2434    }
2435  }
2436
2437  @Override
2438  public void checkTableModifiable(final TableName tableName)
2439      throws IOException, TableNotFoundException, TableNotDisabledException {
2440    if (isCatalogTable(tableName)) {
2441      throw new IOException("Can't modify catalog tables");
2442    }
2443    checkTableExists(tableName);
2444    TableState ts = getTableStateManager().getTableState(tableName);
2445    if (!ts.isDisabled()) {
2446      throw new TableNotDisabledException("Not DISABLED; " + ts);
2447    }
2448  }
2449
2450  public ClusterMetrics getClusterMetricsWithoutCoprocessor() throws InterruptedIOException {
2451    return getClusterMetricsWithoutCoprocessor(EnumSet.allOf(Option.class));
2452  }
2453
2454  public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options)
2455      throws InterruptedIOException {
2456    ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
2457    // given that hbase1 can't submit the request with Option,
2458    // we return all information to client if the list of Option is empty.
2459    if (options.isEmpty()) {
2460      options = EnumSet.allOf(Option.class);
2461    }
2462
2463    for (Option opt : options) {
2464      switch (opt) {
2465        case HBASE_VERSION: builder.setHBaseVersion(VersionInfo.getVersion()); break;
2466        case CLUSTER_ID: builder.setClusterId(getClusterId()); break;
2467        case MASTER: builder.setMasterName(getServerName()); break;
2468        case BACKUP_MASTERS: builder.setBackerMasterNames(getBackupMasters()); break;
2469        case LIVE_SERVERS: {
2470          if (serverManager != null) {
2471            builder.setLiveServerMetrics(serverManager.getOnlineServers().entrySet().stream()
2472              .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
2473          }
2474          break;
2475        }
2476        case DEAD_SERVERS: {
2477          if (serverManager != null) {
2478            builder.setDeadServerNames(new ArrayList<>(
2479              serverManager.getDeadServers().copyServerNames()));
2480          }
2481          break;
2482        }
2483        case MASTER_COPROCESSORS: {
2484          if (cpHost != null) {
2485            builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
2486          }
2487          break;
2488        }
2489        case REGIONS_IN_TRANSITION: {
2490          if (assignmentManager != null) {
2491            builder.setRegionsInTransition(assignmentManager.getRegionStates()
2492                .getRegionsStateInTransition());
2493          }
2494          break;
2495        }
2496        case BALANCER_ON: {
2497          if (loadBalancerTracker != null) {
2498            builder.setBalancerOn(loadBalancerTracker.isBalancerOn());
2499          }
2500          break;
2501        }
2502        case MASTER_INFO_PORT: {
2503          if (infoServer != null) {
2504            builder.setMasterInfoPort(infoServer.getPort());
2505          }
2506          break;
2507        }
2508        case SERVERS_NAME: {
2509          if (serverManager != null) {
2510            builder.setServerNames(serverManager.getOnlineServersList());
2511          }
2512          break;
2513        }
2514        case TABLE_TO_REGIONS_COUNT: {
2515          if (isActiveMaster() && isInitialized() && assignmentManager != null) {
2516            try {
2517              Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
2518              Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
2519              for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
2520                TableName tableName = tableDescriptor.getTableName();
2521                RegionStatesCount regionStatesCount = assignmentManager
2522                  .getRegionStatesCount(tableName);
2523                tableRegionStatesCountMap.put(tableName, regionStatesCount);
2524              }
2525              builder.setTableRegionStatesCount(tableRegionStatesCountMap);
2526            } catch (IOException e) {
2527              LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
2528            }
2529          }
2530          break;
2531        }
2532      }
2533    }
2534    return builder.build();
2535  }
2536
2537  /**
2538   * @return cluster status
2539   */
2540  public ClusterMetrics getClusterMetrics() throws IOException {
2541    return getClusterMetrics(EnumSet.allOf(Option.class));
2542  }
2543
2544  public ClusterMetrics getClusterMetrics(EnumSet<Option> options) throws IOException {
2545    if (cpHost != null) {
2546      cpHost.preGetClusterMetrics();
2547    }
2548    ClusterMetrics status = getClusterMetricsWithoutCoprocessor(options);
2549    if (cpHost != null) {
2550      cpHost.postGetClusterMetrics(status);
2551    }
2552    return status;
2553  }
2554
2555  List<ServerName> getBackupMasters() {
2556    return activeMasterManager.getBackupMasters();
2557  }
2558
2559  /**
2560   * The set of loaded coprocessors is stored in a static set. Since it's
2561   * statically allocated, it does not require that HMaster's cpHost be
2562   * initialized prior to accessing it.
2563   * @return a String representation of the set of names of the loaded coprocessors.
2564   */
2565  public static String getLoadedCoprocessors() {
2566    return CoprocessorHost.getLoadedCoprocessors().toString();
2567  }
2568
2569  /**
2570   * @return timestamp in millis when HMaster was started.
2571   */
2572  public long getMasterStartTime() {
2573    return startcode;
2574  }
2575
2576  /**
2577   * @return timestamp in millis when HMaster became the active master.
2578   */
2579  public long getMasterActiveTime() {
2580    return masterActiveTime;
2581  }
2582
2583  /**
2584   * @return timestamp in millis when HMaster finished becoming the active master
2585   */
2586  public long getMasterFinishedInitializationTime() {
2587    return masterFinishedInitializationTime;
2588  }
2589
2590  public int getNumWALFiles() {
2591    return 0;
2592  }
2593
2594  public ProcedureStore getProcedureStore() {
2595    return procedureStore;
2596  }
2597
2598  public int getRegionServerInfoPort(final ServerName sn) {
2599    int port = this.serverManager.getInfoPort(sn);
2600    return port == 0 ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2601      HConstants.DEFAULT_REGIONSERVER_INFOPORT) : port;
2602  }
2603
2604  @Override
2605  public String getRegionServerVersion(ServerName sn) {
2606    // Will return "0.0.0" if the server is not online to prevent move system region to unknown
2607    // version RS.
2608    return this.serverManager.getVersion(sn);
2609  }
2610
2611  @Override
2612  public void checkIfShouldMoveSystemRegionAsync() {
2613    assignmentManager.checkIfShouldMoveSystemRegionAsync();
2614  }
2615
2616  /**
2617   * @return array of coprocessor SimpleNames.
2618   */
2619  public String[] getMasterCoprocessors() {
2620    Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
2621    return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
2622  }
2623
2624  @Override
2625  public void abort(String reason, Throwable cause) {
2626    if (!setAbortRequested() || isStopped()) {
2627      LOG.debug("Abort called but aborted={}, stopped={}", isAborted(), isStopped());
2628      return;
2629    }
2630    if (cpHost != null) {
2631      // HBASE-4014: dump a list of loaded coprocessors.
2632      LOG.error(HBaseMarkers.FATAL, "Master server abort: loaded coprocessors are: " +
2633          getLoadedCoprocessors());
2634    }
2635    String msg = "***** ABORTING master " + this + ": " + reason + " *****";
2636    if (cause != null) {
2637      LOG.error(HBaseMarkers.FATAL, msg, cause);
2638    } else {
2639      LOG.error(HBaseMarkers.FATAL, msg);
2640    }
2641
2642    try {
2643      stopMaster();
2644    } catch (IOException e) {
2645      LOG.error("Exception occurred while stopping master", e);
2646    }
2647  }
2648
2649  @Override
2650  public ZKWatcher getZooKeeper() {
2651    return zooKeeper;
2652  }
2653
2654  @Override
2655  public MasterCoprocessorHost getMasterCoprocessorHost() {
2656    return cpHost;
2657  }
2658
2659  @Override
2660  public MasterQuotaManager getMasterQuotaManager() {
2661    return quotaManager;
2662  }
2663
2664  @Override
2665  public ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
2666    return procedureExecutor;
2667  }
2668
2669  @Override
2670  public ServerName getServerName() {
2671    return this.serverName;
2672  }
2673
2674  @Override
2675  public AssignmentManager getAssignmentManager() {
2676    return this.assignmentManager;
2677  }
2678
2679  @Override
2680  public CatalogJanitor getCatalogJanitor() {
2681    return this.catalogJanitorChore;
2682  }
2683
2684  public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
2685    return rsFatals;
2686  }
2687
2688  /**
2689   * Shutdown the cluster.
2690   * Master runs a coordinated stop of all RegionServers and then itself.
2691   */
2692  public void shutdown() throws IOException {
2693    if (cpHost != null) {
2694      cpHost.preShutdown();
2695    }
2696
2697    // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
2698    // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
2699    // the cluster status as down. RegionServers will notice this change in state and will start
2700    // shutting themselves down. When last has exited, Master can go down.
2701    if (this.serverManager != null) {
2702      this.serverManager.shutdownCluster();
2703    }
2704    if (this.clusterStatusTracker != null) {
2705      try {
2706        this.clusterStatusTracker.setClusterDown();
2707      } catch (KeeperException e) {
2708        LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
2709      }
2710    }
2711    // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
2712    // processing so we can go down.
2713    if (this.procedureExecutor != null) {
2714      this.procedureExecutor.stop();
2715    }
2716    // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
2717    // this is what we want especially if the Master is in startup phase doing call outs to
2718    // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
2719    // the rpc to timeout.
2720    if (this.asyncClusterConnection != null) {
2721      this.asyncClusterConnection.close();
2722    }
2723  }
2724
2725  public void stopMaster() throws IOException {
2726    if (cpHost != null) {
2727      cpHost.preStopMaster();
2728    }
2729    stop("Stopped by " + Thread.currentThread().getName());
2730  }
2731
2732  @Override
2733  public void stop(String msg) {
2734    if (!isStopped()) {
2735      super.stop(msg);
2736      if (this.activeMasterManager != null) {
2737        this.activeMasterManager.stop();
2738      }
2739    }
2740  }
2741
2742  @InterfaceAudience.Private
2743  protected void checkServiceStarted() throws ServerNotRunningYetException {
2744    if (!serviceStarted) {
2745      throw new ServerNotRunningYetException("Server is not running yet");
2746    }
2747  }
2748
2749  void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException,
2750      MasterNotRunningException, MasterStoppedException {
2751    checkServiceStarted();
2752    if (!isInitialized()) {
2753      throw new PleaseHoldException("Master is initializing");
2754    }
2755    if (isStopped()) {
2756      throw new MasterStoppedException();
2757    }
2758  }
2759
2760  /**
2761   * Report whether this master is currently the active master or not.
2762   * If not active master, we are parked on ZK waiting to become active.
2763   *
2764   * This method is used for testing.
2765   *
2766   * @return true if active master, false if not.
2767   */
2768  @Override
2769  public boolean isActiveMaster() {
2770    return activeMaster;
2771  }
2772
2773  /**
2774   * Report whether this master has completed with its initialization and is
2775   * ready.  If ready, the master is also the active master.  A standby master
2776   * is never ready.
2777   *
2778   * This method is used for testing.
2779   *
2780   * @return true if master is ready to go, false if not.
2781   */
2782  @Override
2783  public boolean isInitialized() {
2784    return initialized.isReady();
2785  }
2786
2787  /**
2788   * Report whether this master is started
2789   *
2790   * This method is used for testing.
2791   *
2792   * @return true if master is ready to go, false if not.
2793   */
2794
2795  @Override
2796  public boolean isOnline() {
2797    return serviceStarted;
2798  }
2799
2800  /**
2801   * Report whether this master is in maintenance mode.
2802   *
2803   * @return true if master is in maintenanceMode
2804   */
2805  @Override
2806  public boolean isInMaintenanceMode() {
2807    return maintenanceMode;
2808  }
2809
2810  @InterfaceAudience.Private
2811  public void setInitialized(boolean isInitialized) {
2812    procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
2813  }
2814
2815  @Override
2816  public ProcedureEvent<?> getInitializedEvent() {
2817    return initialized;
2818  }
2819
2820  /**
2821   * Compute the average load across all region servers.
2822   * Currently, this uses a very naive computation - just uses the number of
2823   * regions being served, ignoring stats about number of requests.
2824   * @return the average load
2825   */
2826  public double getAverageLoad() {
2827    if (this.assignmentManager == null) {
2828      return 0;
2829    }
2830
2831    RegionStates regionStates = this.assignmentManager.getRegionStates();
2832    if (regionStates == null) {
2833      return 0;
2834    }
2835    return regionStates.getAverageLoad();
2836  }
2837
2838  @Override
2839  public boolean registerService(Service instance) {
2840    /*
2841     * No stacking of instances is allowed for a single service name
2842     */
2843    Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
2844    String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
2845    if (coprocessorServiceHandlers.containsKey(serviceName)) {
2846      LOG.error("Coprocessor service "+serviceName+
2847          " already registered, rejecting request from "+instance
2848      );
2849      return false;
2850    }
2851
2852    coprocessorServiceHandlers.put(serviceName, instance);
2853    if (LOG.isDebugEnabled()) {
2854      LOG.debug("Registered master coprocessor service: service="+serviceName);
2855    }
2856    return true;
2857  }
2858
2859  /**
2860   * Utility for constructing an instance of the passed HMaster class.
2861   * @param masterClass
2862   * @return HMaster instance.
2863   */
2864  public static HMaster constructMaster(Class<? extends HMaster> masterClass,
2865      final Configuration conf)  {
2866    try {
2867      Constructor<? extends HMaster> c = masterClass.getConstructor(Configuration.class);
2868      return c.newInstance(conf);
2869    } catch(Exception e) {
2870      Throwable error = e;
2871      if (e instanceof InvocationTargetException &&
2872          ((InvocationTargetException)e).getTargetException() != null) {
2873        error = ((InvocationTargetException)e).getTargetException();
2874      }
2875      throw new RuntimeException("Failed construction of Master: " + masterClass.toString() + ". "
2876        , error);
2877    }
2878  }
2879
2880  /**
2881   * @see org.apache.hadoop.hbase.master.HMasterCommandLine
2882   */
2883  public static void main(String [] args) {
2884    LOG.info("STARTING service " + HMaster.class.getSimpleName());
2885    VersionInfo.logVersion();
2886    new HMasterCommandLine(HMaster.class).doMain(args);
2887  }
2888
2889  public HFileCleaner getHFileCleaner() {
2890    return this.hfileCleaner;
2891  }
2892
2893  public LogCleaner getLogCleaner() {
2894    return this.logCleaner;
2895  }
2896
2897  /**
2898   * @return the underlying snapshot manager
2899   */
2900  @Override
2901  public SnapshotManager getSnapshotManager() {
2902    return this.snapshotManager;
2903  }
2904
2905  /**
2906   * @return the underlying MasterProcedureManagerHost
2907   */
2908  @Override
2909  public MasterProcedureManagerHost getMasterProcedureManagerHost() {
2910    return mpmHost;
2911  }
2912
2913  @Override
2914  public ClusterSchema getClusterSchema() {
2915    return this.clusterSchemaService;
2916  }
2917
2918  /**
2919   * Create a new Namespace.
2920   * @param namespaceDescriptor descriptor for new Namespace
2921   * @param nonceGroup Identifier for the source of the request, a client or process.
2922   * @param nonce A unique identifier for this operation from the client or process identified by
2923   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
2924   * @return procedure id
2925   */
2926  long createNamespace(final NamespaceDescriptor namespaceDescriptor, final long nonceGroup,
2927      final long nonce) throws IOException {
2928    checkInitialized();
2929
2930    TableName.isLegalNamespaceName(Bytes.toBytes(namespaceDescriptor.getName()));
2931
2932    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
2933          nonceGroup, nonce) {
2934      @Override
2935      protected void run() throws IOException {
2936        getMaster().getMasterCoprocessorHost().preCreateNamespace(namespaceDescriptor);
2937        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2938        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2939        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2940        LOG.info(getClientIdAuditPrefix() + " creating " + namespaceDescriptor);
2941        // Execute the operation synchronously - wait for the operation to complete before
2942        // continuing.
2943        setProcId(getClusterSchema().createNamespace(namespaceDescriptor, getNonceKey(), latch));
2944        latch.await();
2945        getMaster().getMasterCoprocessorHost().postCreateNamespace(namespaceDescriptor);
2946      }
2947
2948      @Override
2949      protected String getDescription() {
2950        return "CreateNamespaceProcedure";
2951      }
2952    });
2953  }
2954
2955  /**
2956   * Modify an existing Namespace.
2957   * @param nonceGroup Identifier for the source of the request, a client or process.
2958   * @param nonce A unique identifier for this operation from the client or process identified by
2959   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
2960   * @return procedure id
2961   */
2962  long modifyNamespace(final NamespaceDescriptor newNsDescriptor, final long nonceGroup,
2963      final long nonce) throws IOException {
2964    checkInitialized();
2965
2966    TableName.isLegalNamespaceName(Bytes.toBytes(newNsDescriptor.getName()));
2967
2968    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
2969          nonceGroup, nonce) {
2970      @Override
2971      protected void run() throws IOException {
2972        NamespaceDescriptor oldNsDescriptor = getNamespace(newNsDescriptor.getName());
2973        getMaster().getMasterCoprocessorHost().preModifyNamespace(oldNsDescriptor, newNsDescriptor);
2974        // We need to wait for the procedure to potentially fail due to "prepare" sanity
2975        // checks. This will block only the beginning of the procedure. See HBASE-19953.
2976        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2977        LOG.info(getClientIdAuditPrefix() + " modify " + newNsDescriptor);
2978        // Execute the operation synchronously - wait for the operation to complete before
2979        // continuing.
2980        setProcId(getClusterSchema().modifyNamespace(newNsDescriptor, getNonceKey(), latch));
2981        latch.await();
2982        getMaster().getMasterCoprocessorHost().postModifyNamespace(oldNsDescriptor,
2983          newNsDescriptor);
2984      }
2985
2986      @Override
2987      protected String getDescription() {
2988        return "ModifyNamespaceProcedure";
2989      }
2990    });
2991  }
2992
2993  /**
2994   * Delete an existing Namespace. Only empty Namespaces (no tables) can be removed.
2995   * @param nonceGroup Identifier for the source of the request, a client or process.
2996   * @param nonce A unique identifier for this operation from the client or process identified by
2997   * <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
2998   * @return procedure id
2999   */
3000  long deleteNamespace(final String name, final long nonceGroup, final long nonce)
3001      throws IOException {
3002    checkInitialized();
3003
3004    return MasterProcedureUtil.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this,
3005          nonceGroup, nonce) {
3006      @Override
3007      protected void run() throws IOException {
3008        getMaster().getMasterCoprocessorHost().preDeleteNamespace(name);
3009        LOG.info(getClientIdAuditPrefix() + " delete " + name);
3010        // Execute the operation synchronously - wait for the operation to complete before
3011        // continuing.
3012        //
3013        // We need to wait for the procedure to potentially fail due to "prepare" sanity
3014        // checks. This will block only the beginning of the procedure. See HBASE-19953.
3015        ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3016        setProcId(submitProcedure(
3017              new DeleteNamespaceProcedure(procedureExecutor.getEnvironment(), name, latch)));
3018        latch.await();
3019        // Will not be invoked in the face of Exception thrown by the Procedure's execution
3020        getMaster().getMasterCoprocessorHost().postDeleteNamespace(name);
3021      }
3022
3023      @Override
3024      protected String getDescription() {
3025        return "DeleteNamespaceProcedure";
3026      }
3027    });
3028  }
3029
3030  /**
3031   * Get a Namespace
3032   * @param name Name of the Namespace
3033   * @return Namespace descriptor for <code>name</code>
3034   */
3035  NamespaceDescriptor getNamespace(String name) throws IOException {
3036    checkInitialized();
3037    if (this.cpHost != null) this.cpHost.preGetNamespaceDescriptor(name);
3038    NamespaceDescriptor nsd = this.clusterSchemaService.getNamespace(name);
3039    if (this.cpHost != null) this.cpHost.postGetNamespaceDescriptor(nsd);
3040    return nsd;
3041  }
3042
3043  /**
3044   * Get all Namespaces
3045   * @return All Namespace descriptors
3046   */
3047  List<NamespaceDescriptor> getNamespaces() throws IOException {
3048    checkInitialized();
3049    final List<NamespaceDescriptor> nsds = new ArrayList<>();
3050    if (cpHost != null) {
3051      cpHost.preListNamespaceDescriptors(nsds);
3052    }
3053    nsds.addAll(this.clusterSchemaService.getNamespaces());
3054    if (this.cpHost != null) {
3055      this.cpHost.postListNamespaceDescriptors(nsds);
3056    }
3057    return nsds;
3058  }
3059
3060  /**
3061   * List namespace names
3062   * @return All namespace names
3063   */
3064  public List<String> listNamespaces() throws IOException {
3065    checkInitialized();
3066    List<String> namespaces = new ArrayList<>();
3067    if (cpHost != null) {
3068      cpHost.preListNamespaces(namespaces);
3069    }
3070    for (NamespaceDescriptor namespace : clusterSchemaService.getNamespaces()) {
3071      namespaces.add(namespace.getName());
3072    }
3073    if (cpHost != null) {
3074      cpHost.postListNamespaces(namespaces);
3075    }
3076    return namespaces;
3077  }
3078
3079  @Override
3080  public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3081    checkInitialized();
3082    return listTableNames(name, null, true);
3083  }
3084
3085  @Override
3086  public List<TableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3087    checkInitialized();
3088    return listTableDescriptors(name, null, null, true);
3089  }
3090
3091  @Override
3092  public boolean abortProcedure(final long procId, final boolean mayInterruptIfRunning)
3093      throws IOException {
3094    if (cpHost != null) {
3095      cpHost.preAbortProcedure(this.procedureExecutor, procId);
3096    }
3097
3098    final boolean result = this.procedureExecutor.abort(procId, mayInterruptIfRunning);
3099
3100    if (cpHost != null) {
3101      cpHost.postAbortProcedure();
3102    }
3103
3104    return result;
3105  }
3106
3107  @Override
3108  public List<Procedure<?>> getProcedures() throws IOException {
3109    if (cpHost != null) {
3110      cpHost.preGetProcedures();
3111    }
3112
3113    @SuppressWarnings({ "unchecked", "rawtypes" })
3114    List<Procedure<?>> procList = (List) this.procedureExecutor.getProcedures();
3115
3116    if (cpHost != null) {
3117      cpHost.postGetProcedures(procList);
3118    }
3119
3120    return procList;
3121  }
3122
3123  @Override
3124  public List<LockedResource> getLocks() throws IOException {
3125    if (cpHost != null) {
3126      cpHost.preGetLocks();
3127    }
3128
3129    MasterProcedureScheduler procedureScheduler =
3130      procedureExecutor.getEnvironment().getProcedureScheduler();
3131
3132    final List<LockedResource> lockedResources = procedureScheduler.getLocks();
3133
3134    if (cpHost != null) {
3135      cpHost.postGetLocks(lockedResources);
3136    }
3137
3138    return lockedResources;
3139  }
3140
3141  /**
3142   * Returns the list of table descriptors that match the specified request
3143   * @param namespace the namespace to query, or null if querying for all
3144   * @param regex The regular expression to match against, or null if querying for all
3145   * @param tableNameList the list of table names, or null if querying for all
3146   * @param includeSysTables False to match only against userspace tables
3147   * @return the list of table descriptors
3148   */
3149  public List<TableDescriptor> listTableDescriptors(final String namespace, final String regex,
3150      final List<TableName> tableNameList, final boolean includeSysTables)
3151  throws IOException {
3152    List<TableDescriptor> htds = new ArrayList<>();
3153    if (cpHost != null) {
3154      cpHost.preGetTableDescriptors(tableNameList, htds, regex);
3155    }
3156    htds = getTableDescriptors(htds, namespace, regex, tableNameList, includeSysTables);
3157    if (cpHost != null) {
3158      cpHost.postGetTableDescriptors(tableNameList, htds, regex);
3159    }
3160    return htds;
3161  }
3162
3163  /**
3164   * Returns the list of table names that match the specified request
3165   * @param regex The regular expression to match against, or null if querying for all
3166   * @param namespace the namespace to query, or null if querying for all
3167   * @param includeSysTables False to match only against userspace tables
3168   * @return the list of table names
3169   */
3170  public List<TableName> listTableNames(final String namespace, final String regex,
3171      final boolean includeSysTables) throws IOException {
3172    List<TableDescriptor> htds = new ArrayList<>();
3173    if (cpHost != null) {
3174      cpHost.preGetTableNames(htds, regex);
3175    }
3176    htds = getTableDescriptors(htds, namespace, regex, null, includeSysTables);
3177    if (cpHost != null) {
3178      cpHost.postGetTableNames(htds, regex);
3179    }
3180    List<TableName> result = new ArrayList<>(htds.size());
3181    for (TableDescriptor htd: htds) result.add(htd.getTableName());
3182    return result;
3183  }
3184
3185  /**
3186   * Return a list of table table descriptors after applying any provided filter parameters. Note
3187   * that the user-facing description of this filter logic is presented on the class-level javadoc
3188   * of {@link NormalizeTableFilterParams}.
3189   */
3190  private List<TableDescriptor> getTableDescriptors(final List<TableDescriptor> htds,
3191      final String namespace, final String regex, final List<TableName> tableNameList,
3192      final boolean includeSysTables)
3193  throws IOException {
3194    if (tableNameList == null || tableNameList.isEmpty()) {
3195      // request for all TableDescriptors
3196      Collection<TableDescriptor> allHtds;
3197      if (namespace != null && namespace.length() > 0) {
3198        // Do a check on the namespace existence. Will fail if does not exist.
3199        this.clusterSchemaService.getNamespace(namespace);
3200        allHtds = tableDescriptors.getByNamespace(namespace).values();
3201      } else {
3202        allHtds = tableDescriptors.getAll().values();
3203      }
3204      for (TableDescriptor desc: allHtds) {
3205        if (tableStateManager.isTablePresent(desc.getTableName())
3206            && (includeSysTables || !desc.getTableName().isSystemTable())) {
3207          htds.add(desc);
3208        }
3209      }
3210    } else {
3211      for (TableName s: tableNameList) {
3212        if (tableStateManager.isTablePresent(s)) {
3213          TableDescriptor desc = tableDescriptors.get(s);
3214          if (desc != null) {
3215            htds.add(desc);
3216          }
3217        }
3218      }
3219    }
3220
3221    // Retains only those matched by regular expression.
3222    if (regex != null) filterTablesByRegex(htds, Pattern.compile(regex));
3223    return htds;
3224  }
3225
3226  /**
3227   * Removes the table descriptors that don't match the pattern.
3228   * @param descriptors list of table descriptors to filter
3229   * @param pattern the regex to use
3230   */
3231  private static void filterTablesByRegex(final Collection<TableDescriptor> descriptors,
3232      final Pattern pattern) {
3233    final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
3234    Iterator<TableDescriptor> itr = descriptors.iterator();
3235    while (itr.hasNext()) {
3236      TableDescriptor htd = itr.next();
3237      String tableName = htd.getTableName().getNameAsString();
3238      boolean matched = pattern.matcher(tableName).matches();
3239      if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
3240        matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
3241      }
3242      if (!matched) {
3243        itr.remove();
3244      }
3245    }
3246  }
3247
3248  @Override
3249  public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
3250    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3251        .getLastMajorCompactionTimestamp(table);
3252  }
3253
3254  @Override
3255  public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
3256    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3257        .getLastMajorCompactionTimestamp(regionName);
3258  }
3259
3260  /**
3261   * Gets the mob file compaction state for a specific table.
3262   * Whether all the mob files are selected is known during the compaction execution, but
3263   * the statistic is done just before compaction starts, it is hard to know the compaction
3264   * type at that time, so the rough statistics are chosen for the mob file compaction. Only two
3265   * compaction states are available, CompactionState.MAJOR_AND_MINOR and CompactionState.NONE.
3266   * @param tableName The current table name.
3267   * @return If a given table is in mob file compaction now.
3268   */
3269  public GetRegionInfoResponse.CompactionState getMobCompactionState(TableName tableName) {
3270    AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3271    if (compactionsCount != null && compactionsCount.get() != 0) {
3272      return GetRegionInfoResponse.CompactionState.MAJOR_AND_MINOR;
3273    }
3274    return GetRegionInfoResponse.CompactionState.NONE;
3275  }
3276
3277  public void reportMobCompactionStart(TableName tableName) throws IOException {
3278    IdLock.Entry lockEntry = null;
3279    try {
3280      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3281      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3282      if (compactionsCount == null) {
3283        compactionsCount = new AtomicInteger(0);
3284        mobCompactionStates.put(tableName, compactionsCount);
3285      }
3286      compactionsCount.incrementAndGet();
3287    } finally {
3288      if (lockEntry != null) {
3289        mobCompactionLock.releaseLockEntry(lockEntry);
3290      }
3291    }
3292  }
3293
3294  public void reportMobCompactionEnd(TableName tableName) throws IOException {
3295    IdLock.Entry lockEntry = null;
3296    try {
3297      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3298      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3299      if (compactionsCount != null) {
3300        int count = compactionsCount.decrementAndGet();
3301        // remove the entry if the count is 0.
3302        if (count == 0) {
3303          mobCompactionStates.remove(tableName);
3304        }
3305      }
3306    } finally {
3307      if (lockEntry != null) {
3308        mobCompactionLock.releaseLockEntry(lockEntry);
3309      }
3310    }
3311  }
3312
3313
3314  /**
3315   * Queries the state of the {@link LoadBalancerTracker}. If the balancer is not initialized,
3316   * false is returned.
3317   *
3318   * @return The state of the load balancer, or false if the load balancer isn't defined.
3319   */
3320  public boolean isBalancerOn() {
3321    return !isInMaintenanceMode()
3322        && loadBalancerTracker != null
3323        && loadBalancerTracker.isBalancerOn();
3324  }
3325
3326  /**
3327   * Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized,
3328   * false is returned.
3329   */
3330  public boolean isNormalizerOn() {
3331    return !isInMaintenanceMode()
3332      && getRegionNormalizerManager().isNormalizerOn();
3333  }
3334
3335  /**
3336   * Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized,
3337   * false is returned. If switchType is illegal, false will return.
3338   * @param switchType see {@link org.apache.hadoop.hbase.client.MasterSwitchType}
3339   * @return The state of the switch
3340   */
3341  @Override
3342  public boolean isSplitOrMergeEnabled(MasterSwitchType switchType) {
3343    return !isInMaintenanceMode()
3344        && splitOrMergeTracker != null
3345        && splitOrMergeTracker.isSplitOrMergeEnabled(switchType);
3346  }
3347
3348  /**
3349   * Fetch the configured {@link LoadBalancer} class name. If none is set, a default is returned.
3350   * <p/>
3351   * Notice that, the base load balancer will always be {@link RSGroupBasedLoadBalancer} now, so
3352   * this method will return the balancer used inside each rs group.
3353   * @return The name of the {@link LoadBalancer} in use.
3354   */
3355  public String getLoadBalancerClassName() {
3356    return conf.get(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
3357      LoadBalancerFactory.getDefaultLoadBalancerClass().getName());
3358  }
3359
3360  public SplitOrMergeTracker getSplitOrMergeTracker() {
3361    return splitOrMergeTracker;
3362  }
3363
3364  @Override
3365  public RSGroupBasedLoadBalancer getLoadBalancer() {
3366    return balancer;
3367  }
3368
3369  @Override
3370  public FavoredNodesManager getFavoredNodesManager() {
3371    return balancer.getFavoredNodesManager();
3372  }
3373
3374  private long executePeerProcedure(AbstractPeerProcedure<?> procedure) throws IOException {
3375    long procId = procedureExecutor.submitProcedure(procedure);
3376    procedure.getLatch().await();
3377    return procId;
3378  }
3379
3380  @Override
3381  public long addReplicationPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
3382      throws ReplicationException, IOException {
3383    LOG.info(getClientIdAuditPrefix() + " creating replication peer, id=" + peerId + ", config=" +
3384      peerConfig + ", state=" + (enabled ? "ENABLED" : "DISABLED"));
3385    return executePeerProcedure(new AddPeerProcedure(peerId, peerConfig, enabled));
3386  }
3387
3388  @Override
3389  public long removeReplicationPeer(String peerId) throws ReplicationException, IOException {
3390    LOG.info(getClientIdAuditPrefix() + " removing replication peer, id=" + peerId);
3391    return executePeerProcedure(new RemovePeerProcedure(peerId));
3392  }
3393
3394  @Override
3395  public long enableReplicationPeer(String peerId) throws ReplicationException, IOException {
3396    LOG.info(getClientIdAuditPrefix() + " enable replication peer, id=" + peerId);
3397    return executePeerProcedure(new EnablePeerProcedure(peerId));
3398  }
3399
3400  @Override
3401  public long disableReplicationPeer(String peerId) throws ReplicationException, IOException {
3402    LOG.info(getClientIdAuditPrefix() + " disable replication peer, id=" + peerId);
3403    return executePeerProcedure(new DisablePeerProcedure(peerId));
3404  }
3405
3406  @Override
3407  public ReplicationPeerConfig getReplicationPeerConfig(String peerId)
3408      throws ReplicationException, IOException {
3409    if (cpHost != null) {
3410      cpHost.preGetReplicationPeerConfig(peerId);
3411    }
3412    LOG.info(getClientIdAuditPrefix() + " get replication peer config, id=" + peerId);
3413    ReplicationPeerConfig peerConfig = this.replicationPeerManager.getPeerConfig(peerId)
3414        .orElseThrow(() -> new ReplicationPeerNotFoundException(peerId));
3415    if (cpHost != null) {
3416      cpHost.postGetReplicationPeerConfig(peerId);
3417    }
3418    return peerConfig;
3419  }
3420
3421  @Override
3422  public long updateReplicationPeerConfig(String peerId, ReplicationPeerConfig peerConfig)
3423      throws ReplicationException, IOException {
3424    LOG.info(getClientIdAuditPrefix() + " update replication peer config, id=" + peerId +
3425      ", config=" + peerConfig);
3426    return executePeerProcedure(new UpdatePeerConfigProcedure(peerId, peerConfig));
3427  }
3428
3429  @Override
3430  public List<ReplicationPeerDescription> listReplicationPeers(String regex)
3431      throws ReplicationException, IOException {
3432    if (cpHost != null) {
3433      cpHost.preListReplicationPeers(regex);
3434    }
3435    LOG.debug("{} list replication peers, regex={}", getClientIdAuditPrefix(), regex);
3436    Pattern pattern = regex == null ? null : Pattern.compile(regex);
3437    List<ReplicationPeerDescription> peers =
3438      this.replicationPeerManager.listPeers(pattern);
3439    if (cpHost != null) {
3440      cpHost.postListReplicationPeers(regex);
3441    }
3442    return peers;
3443  }
3444
3445  @Override
3446  public long transitReplicationPeerSyncReplicationState(String peerId, SyncReplicationState state)
3447    throws ReplicationException, IOException {
3448    LOG.info(
3449      getClientIdAuditPrefix() +
3450        " transit current cluster state to {} in a synchronous replication peer id={}",
3451      state, peerId);
3452    return executePeerProcedure(new TransitPeerSyncReplicationStateProcedure(peerId, state));
3453  }
3454
3455  /**
3456   * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional
3457   * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0
3458   * @param servers Region servers to decommission.
3459   */
3460  public void decommissionRegionServers(final List<ServerName> servers, final boolean offload)
3461      throws IOException {
3462    List<ServerName> serversAdded = new ArrayList<>(servers.size());
3463    // Place the decommission marker first.
3464    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3465    for (ServerName server : servers) {
3466      try {
3467        String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3468        ZKUtil.createAndFailSilent(getZooKeeper(), node);
3469      } catch (KeeperException ke) {
3470        throw new HBaseIOException(
3471          this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke);
3472      }
3473      if (this.serverManager.addServerToDrainList(server)) {
3474        serversAdded.add(server);
3475      }
3476    }
3477    // Move the regions off the decommissioned servers.
3478    if (offload) {
3479      final List<ServerName> destServers = this.serverManager.createDestinationServersList();
3480      for (ServerName server : serversAdded) {
3481        final List<RegionInfo> regionsOnServer = this.assignmentManager.getRegionsOnServer(server);
3482        for (RegionInfo hri : regionsOnServer) {
3483          ServerName dest = balancer.randomAssignment(hri, destServers);
3484          if (dest == null) {
3485            throw new HBaseIOException("Unable to determine a plan to move " + hri);
3486          }
3487          RegionPlan rp = new RegionPlan(hri, server, dest);
3488          this.assignmentManager.moveAsync(rp);
3489        }
3490      }
3491    }
3492  }
3493
3494  /**
3495   * List region servers marked as decommissioned (previously called 'draining') to not get regions
3496   * assigned to them.
3497   * @return List of decommissioned servers.
3498   */
3499  public List<ServerName> listDecommissionedRegionServers() {
3500    return this.serverManager.getDrainingServersList();
3501  }
3502
3503  /**
3504   * Remove decommission marker (previously called 'draining') from a region server to allow regions
3505   * assignments. Load regions onto the server asynchronously if a list of regions is given
3506   * @param server Region server to remove decommission marker from.
3507   */
3508  public void recommissionRegionServer(final ServerName server,
3509      final List<byte[]> encodedRegionNames) throws IOException {
3510    // Remove the server from decommissioned (draining) server list.
3511    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3512    String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3513    try {
3514      ZKUtil.deleteNodeFailSilent(getZooKeeper(), node);
3515    } catch (KeeperException ke) {
3516      throw new HBaseIOException(
3517        this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke);
3518    }
3519    this.serverManager.removeServerFromDrainList(server);
3520
3521    // Load the regions onto the server if we are given a list of regions.
3522    if (encodedRegionNames == null || encodedRegionNames.isEmpty()) {
3523      return;
3524    }
3525    if (!this.serverManager.isServerOnline(server)) {
3526      return;
3527    }
3528    for (byte[] encodedRegionName : encodedRegionNames) {
3529      RegionState regionState =
3530        assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
3531      if (regionState == null) {
3532        LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName));
3533        continue;
3534      }
3535      RegionInfo hri = regionState.getRegion();
3536      if (server.equals(regionState.getServerName())) {
3537        LOG.info("Skipping move of region " + hri.getRegionNameAsString() +
3538          " because region already assigned to the same server " + server + ".");
3539        continue;
3540      }
3541      RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server);
3542      this.assignmentManager.moveAsync(rp);
3543    }
3544  }
3545
3546  @Override
3547  public LockManager getLockManager() {
3548    return lockManager;
3549  }
3550
3551  public QuotaObserverChore getQuotaObserverChore() {
3552    return this.quotaObserverChore;
3553  }
3554
3555  public SpaceQuotaSnapshotNotifier getSpaceQuotaSnapshotNotifier() {
3556    return this.spaceQuotaSnapshotNotifier;
3557  }
3558
3559  @SuppressWarnings("unchecked")
3560  private RemoteProcedure<MasterProcedureEnv, ?> getRemoteProcedure(long procId) {
3561    Procedure<?> procedure = procedureExecutor.getProcedure(procId);
3562    if (procedure == null) {
3563      return null;
3564    }
3565    assert procedure instanceof RemoteProcedure;
3566    return (RemoteProcedure<MasterProcedureEnv, ?>) procedure;
3567  }
3568
3569  public void remoteProcedureCompleted(long procId) {
3570    LOG.debug("Remote procedure done, pid={}", procId);
3571    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3572    if (procedure != null) {
3573      procedure.remoteOperationCompleted(procedureExecutor.getEnvironment());
3574    }
3575  }
3576
3577  public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
3578    LOG.debug("Remote procedure failed, pid={}", procId, error);
3579    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3580    if (procedure != null) {
3581      procedure.remoteOperationFailed(procedureExecutor.getEnvironment(), error);
3582    }
3583  }
3584
3585  /**
3586   * Reopen regions provided in the argument
3587   *
3588   * @param tableName The current table name
3589   * @param regionNames The region names of the regions to reopen
3590   * @param nonceGroup Identifier for the source of the request, a client or process
3591   * @param nonce A unique identifier for this operation from the client or process identified by
3592   *   <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
3593   * @return procedure Id
3594   * @throws IOException if reopening region fails while running procedure
3595   */
3596  long reopenRegions(final TableName tableName, final List<byte[]> regionNames,
3597      final long nonceGroup, final long nonce)
3598      throws IOException {
3599
3600    return MasterProcedureUtil
3601      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3602
3603        @Override
3604        protected void run() throws IOException {
3605          submitProcedure(new ReopenTableRegionsProcedure(tableName, regionNames));
3606        }
3607
3608        @Override
3609        protected String getDescription() {
3610          return "ReopenTableRegionsProcedure";
3611        }
3612
3613      });
3614
3615  }
3616
3617  @Override
3618  public ReplicationPeerManager getReplicationPeerManager() {
3619    return replicationPeerManager;
3620  }
3621
3622  public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
3623      getReplicationLoad(ServerName[] serverNames) {
3624    List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);
3625    if (peerList == null) {
3626      return null;
3627    }
3628    HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap =
3629        new HashMap<>(peerList.size());
3630    peerList.stream()
3631        .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>()));
3632    for (ServerName serverName : serverNames) {
3633      List<ReplicationLoadSource> replicationLoadSources =
3634          getServerManager().getLoad(serverName).getReplicationLoadSourceList();
3635      for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) {
3636        List<Pair<ServerName, ReplicationLoadSource>> replicationLoadSourceList =
3637          replicationLoadSourceMap.get(replicationLoadSource.getPeerID());
3638        if (replicationLoadSourceList == null) {
3639          LOG.debug("{} does not exist, but it exists "
3640            + "in znode(/hbase/replication/rs). when the rs restarts, peerId is deleted, so "
3641            + "we just need to ignore it", replicationLoadSource.getPeerID());
3642          continue;
3643        }
3644        replicationLoadSourceList.add(new Pair<>(serverName, replicationLoadSource));
3645      }
3646    }
3647    for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) {
3648      if (loads.size() > 0) {
3649        loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag()));
3650      }
3651    }
3652    return replicationLoadSourceMap;
3653  }
3654
3655  /**
3656   * This method modifies the master's configuration in order to inject replication-related features
3657   */
3658  @InterfaceAudience.Private
3659  public static void decorateMasterConfiguration(Configuration conf) {
3660    String plugins = conf.get(HBASE_MASTER_LOGCLEANER_PLUGINS);
3661    String cleanerClass = ReplicationLogCleaner.class.getCanonicalName();
3662    if (plugins == null || !plugins.contains(cleanerClass)) {
3663      conf.set(HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
3664    }
3665    if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
3666      plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
3667      cleanerClass = ReplicationHFileCleaner.class.getCanonicalName();
3668      if (!plugins.contains(cleanerClass)) {
3669        conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, plugins + "," + cleanerClass);
3670      }
3671    }
3672  }
3673
3674  public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
3675    return this.snapshotQuotaChore;
3676  }
3677
3678  @Override
3679  public SyncReplicationReplayWALManager getSyncReplicationReplayWALManager() {
3680    return this.syncReplicationReplayWALManager;
3681  }
3682
3683  @Override
3684  public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
3685    if (!this.isOnline() || !LoadBalancer.isMasterCanHostUserRegions(conf)) {
3686      return new HashMap<>();
3687    }
3688    return super.getWalGroupsReplicationStatus();
3689  }
3690
3691  public HbckChore getHbckChore() {
3692    return this.hbckChore;
3693  }
3694
3695  @Override
3696  public String getClusterId() {
3697    if (activeMaster) {
3698      return super.getClusterId();
3699    }
3700    return cachedClusterId.getFromCacheOrFetch();
3701  }
3702
3703  public Optional<ServerName> getActiveMaster() {
3704    return activeMasterManager.getActiveMasterServerName();
3705  }
3706
3707  @Override
3708  public void runReplicationBarrierCleaner() {
3709    ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
3710    if (rbc != null) {
3711      rbc.chore();
3712    }
3713  }
3714
3715  public MetaRegionLocationCache getMetaRegionLocationCache() {
3716    return this.metaRegionLocationCache;
3717  }
3718
3719  @Override
3720  public RSGroupInfoManager getRSGroupInfoManager() {
3721    return rsGroupInfoManager;
3722  }
3723
3724  /**
3725   * Get the compaction state of the table
3726   *
3727   * @param tableName The table name
3728   * @return CompactionState Compaction state of the table
3729   */
3730  public CompactionState getCompactionState(final TableName tableName) {
3731    CompactionState compactionState = CompactionState.NONE;
3732    try {
3733      List<RegionInfo> regions =
3734        assignmentManager.getRegionStates().getRegionsOfTable(tableName);
3735      for (RegionInfo regionInfo : regions) {
3736        ServerName serverName =
3737          assignmentManager.getRegionStates().getRegionServerOfRegion(regionInfo);
3738        if (serverName == null) {
3739          continue;
3740        }
3741        ServerMetrics sl = serverManager.getLoad(serverName);
3742        if (sl == null) {
3743          continue;
3744        }
3745        RegionMetrics regionMetrics = sl.getRegionMetrics().get(regionInfo.getRegionName());
3746        if (regionMetrics.getCompactionState() == CompactionState.MAJOR) {
3747          if (compactionState == CompactionState.MINOR) {
3748            compactionState = CompactionState.MAJOR_AND_MINOR;
3749          } else {
3750            compactionState = CompactionState.MAJOR;
3751          }
3752        } else if (regionMetrics.getCompactionState() == CompactionState.MINOR) {
3753          if (compactionState == CompactionState.MAJOR) {
3754            compactionState = CompactionState.MAJOR_AND_MINOR;
3755          } else {
3756            compactionState = CompactionState.MINOR;
3757          }
3758        }
3759      }
3760    } catch (Exception e) {
3761      compactionState = null;
3762      LOG.error("Exception when get compaction state for " + tableName.getNameAsString(), e);
3763    }
3764    return compactionState;
3765  }
3766
3767  @Override
3768  public MetaLocationSyncer getMetaLocationSyncer() {
3769    return metaLocationSyncer;
3770  }
3771}