001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK;
021import static org.apache.hadoop.hbase.HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS;
022import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK;
023import static org.apache.hadoop.hbase.master.cleaner.HFileCleaner.CUSTOM_POOL_SIZE;
024import static org.apache.hadoop.hbase.util.DNS.MASTER_HOSTNAME_KEY;
025
026import com.google.errorprone.annotations.RestrictedApi;
027import io.opentelemetry.api.trace.Span;
028import io.opentelemetry.api.trace.StatusCode;
029import io.opentelemetry.context.Scope;
030import java.io.IOException;
031import java.io.InterruptedIOException;
032import java.lang.reflect.Constructor;
033import java.lang.reflect.InvocationTargetException;
034import java.net.InetAddress;
035import java.net.InetSocketAddress;
036import java.net.UnknownHostException;
037import java.util.ArrayList;
038import java.util.Arrays;
039import java.util.Collection;
040import java.util.Collections;
041import java.util.Comparator;
042import java.util.EnumSet;
043import java.util.HashMap;
044import java.util.HashSet;
045import java.util.Iterator;
046import java.util.LinkedList;
047import java.util.List;
048import java.util.Map;
049import java.util.Objects;
050import java.util.Optional;
051import java.util.Set;
052import java.util.concurrent.ExecutionException;
053import java.util.concurrent.Future;
054import java.util.concurrent.TimeUnit;
055import java.util.concurrent.TimeoutException;
056import java.util.concurrent.atomic.AtomicInteger;
057import java.util.regex.Pattern;
058import java.util.stream.Collectors;
059import javax.servlet.http.HttpServlet;
060import org.apache.commons.lang3.StringUtils;
061import org.apache.hadoop.conf.Configuration;
062import org.apache.hadoop.fs.FSDataOutputStream;
063import org.apache.hadoop.fs.Path;
064import org.apache.hadoop.hbase.CatalogFamilyFormat;
065import org.apache.hadoop.hbase.Cell;
066import org.apache.hadoop.hbase.CellBuilderFactory;
067import org.apache.hadoop.hbase.CellBuilderType;
068import org.apache.hadoop.hbase.ClusterId;
069import org.apache.hadoop.hbase.ClusterMetrics;
070import org.apache.hadoop.hbase.ClusterMetrics.Option;
071import org.apache.hadoop.hbase.ClusterMetricsBuilder;
072import org.apache.hadoop.hbase.DoNotRetryIOException;
073import org.apache.hadoop.hbase.HBaseIOException;
074import org.apache.hadoop.hbase.HBaseInterfaceAudience;
075import org.apache.hadoop.hbase.HBaseServerBase;
076import org.apache.hadoop.hbase.HConstants;
077import org.apache.hadoop.hbase.HRegionLocation;
078import org.apache.hadoop.hbase.InvalidFamilyOperationException;
079import org.apache.hadoop.hbase.MasterNotRunningException;
080import org.apache.hadoop.hbase.MetaTableAccessor;
081import org.apache.hadoop.hbase.NamespaceDescriptor;
082import org.apache.hadoop.hbase.PleaseHoldException;
083import org.apache.hadoop.hbase.PleaseRestartMasterException;
084import org.apache.hadoop.hbase.RegionMetrics;
085import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
086import org.apache.hadoop.hbase.ScheduledChore;
087import org.apache.hadoop.hbase.ServerMetrics;
088import org.apache.hadoop.hbase.ServerName;
089import org.apache.hadoop.hbase.ServerTask;
090import org.apache.hadoop.hbase.ServerTaskBuilder;
091import org.apache.hadoop.hbase.TableName;
092import org.apache.hadoop.hbase.TableNotDisabledException;
093import org.apache.hadoop.hbase.TableNotFoundException;
094import org.apache.hadoop.hbase.UnknownRegionException;
095import org.apache.hadoop.hbase.client.BalanceRequest;
096import org.apache.hadoop.hbase.client.BalanceResponse;
097import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
098import org.apache.hadoop.hbase.client.CompactionState;
099import org.apache.hadoop.hbase.client.MasterSwitchType;
100import org.apache.hadoop.hbase.client.NormalizeTableFilterParams;
101import org.apache.hadoop.hbase.client.Put;
102import org.apache.hadoop.hbase.client.RegionInfo;
103import org.apache.hadoop.hbase.client.RegionInfoBuilder;
104import org.apache.hadoop.hbase.client.RegionStatesCount;
105import org.apache.hadoop.hbase.client.ResultScanner;
106import org.apache.hadoop.hbase.client.Scan;
107import org.apache.hadoop.hbase.client.TableDescriptor;
108import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
109import org.apache.hadoop.hbase.client.TableState;
110import org.apache.hadoop.hbase.conf.ConfigurationManager;
111import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
112import org.apache.hadoop.hbase.exceptions.DeserializationException;
113import org.apache.hadoop.hbase.exceptions.MasterStoppedException;
114import org.apache.hadoop.hbase.executor.ExecutorType;
115import org.apache.hadoop.hbase.favored.FavoredNodesManager;
116import org.apache.hadoop.hbase.http.HttpServer;
117import org.apache.hadoop.hbase.http.InfoServer;
118import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
119import org.apache.hadoop.hbase.ipc.RpcServer;
120import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
121import org.apache.hadoop.hbase.log.HBaseMarkers;
122import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
123import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
124import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
125import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
126import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
127import org.apache.hadoop.hbase.master.assignment.RegionStates;
128import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
129import org.apache.hadoop.hbase.master.balancer.BalancerChore;
130import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
131import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
132import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
133import org.apache.hadoop.hbase.master.balancer.LoadBalancerStateStore;
134import org.apache.hadoop.hbase.master.balancer.MaintenanceLoadBalancer;
135import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
136import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
137import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
138import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner;
139import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore;
140import org.apache.hadoop.hbase.master.hbck.HbckChore;
141import org.apache.hadoop.hbase.master.http.MasterDumpServlet;
142import org.apache.hadoop.hbase.master.http.MasterRedirectServlet;
143import org.apache.hadoop.hbase.master.http.MasterStatusServlet;
144import org.apache.hadoop.hbase.master.http.api_v1.ResourceConfigFactory;
145import org.apache.hadoop.hbase.master.janitor.CatalogJanitor;
146import org.apache.hadoop.hbase.master.locking.LockManager;
147import org.apache.hadoop.hbase.master.migrate.RollingUpgradeChore;
148import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerFactory;
149import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerManager;
150import org.apache.hadoop.hbase.master.normalizer.RegionNormalizerStateStore;
151import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
152import org.apache.hadoop.hbase.master.procedure.DeleteNamespaceProcedure;
153import org.apache.hadoop.hbase.master.procedure.DeleteTableProcedure;
154import org.apache.hadoop.hbase.master.procedure.DisableTableProcedure;
155import org.apache.hadoop.hbase.master.procedure.EnableTableProcedure;
156import org.apache.hadoop.hbase.master.procedure.InitMetaProcedure;
157import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
158import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
159import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;
160import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
161import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedureRunnable;
162import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
163import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
164import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
165import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
166import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
167import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
168import org.apache.hadoop.hbase.master.region.MasterRegion;
169import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
170import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
171import org.apache.hadoop.hbase.master.replication.AddPeerProcedure;
172import org.apache.hadoop.hbase.master.replication.DisablePeerProcedure;
173import org.apache.hadoop.hbase.master.replication.EnablePeerProcedure;
174import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
175import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
176import org.apache.hadoop.hbase.master.replication.SyncReplicationReplayWALManager;
177import org.apache.hadoop.hbase.master.replication.TransitPeerSyncReplicationStateProcedure;
178import org.apache.hadoop.hbase.master.replication.UpdatePeerConfigProcedure;
179import org.apache.hadoop.hbase.master.slowlog.SlowLogMasterService;
180import org.apache.hadoop.hbase.master.snapshot.SnapshotCleanupStateStore;
181import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
182import org.apache.hadoop.hbase.master.waleventtracker.WALEventTrackerTableCreator;
183import org.apache.hadoop.hbase.master.zksyncer.MasterAddressSyncer;
184import org.apache.hadoop.hbase.master.zksyncer.MetaLocationSyncer;
185import org.apache.hadoop.hbase.mob.MobFileCleanerChore;
186import org.apache.hadoop.hbase.mob.MobFileCompactionChore;
187import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
188import org.apache.hadoop.hbase.monitoring.MonitoredTask;
189import org.apache.hadoop.hbase.monitoring.TaskMonitor;
190import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
191import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
192import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
193import org.apache.hadoop.hbase.procedure2.LockedResource;
194import org.apache.hadoop.hbase.procedure2.Procedure;
195import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
196import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
197import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
198import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
199import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
200import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureStoreListener;
201import org.apache.hadoop.hbase.procedure2.store.region.RegionProcedureStore;
202import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
203import org.apache.hadoop.hbase.quotas.MasterQuotasObserver;
204import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
205import org.apache.hadoop.hbase.quotas.QuotaTableUtil;
206import org.apache.hadoop.hbase.quotas.QuotaUtil;
207import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore;
208import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
209import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot.SpaceQuotaStatus;
210import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
211import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
212import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
213import org.apache.hadoop.hbase.regionserver.HRegionServer;
214import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
215import org.apache.hadoop.hbase.regionserver.storefiletracker.ModifyColumnFamilyStoreFileTrackerProcedure;
216import org.apache.hadoop.hbase.regionserver.storefiletracker.ModifyTableStoreFileTrackerProcedure;
217import org.apache.hadoop.hbase.replication.ReplicationException;
218import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
219import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
220import org.apache.hadoop.hbase.replication.ReplicationPeerDescription;
221import org.apache.hadoop.hbase.replication.ReplicationUtils;
222import org.apache.hadoop.hbase.replication.SyncReplicationState;
223import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner;
224import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
225import org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator;
226import org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint;
227import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer;
228import org.apache.hadoop.hbase.rsgroup.RSGroupInfoManager;
229import org.apache.hadoop.hbase.rsgroup.RSGroupUtil;
230import org.apache.hadoop.hbase.security.AccessDeniedException;
231import org.apache.hadoop.hbase.security.SecurityConstants;
232import org.apache.hadoop.hbase.security.Superusers;
233import org.apache.hadoop.hbase.security.UserProvider;
234import org.apache.hadoop.hbase.trace.TraceUtil;
235import org.apache.hadoop.hbase.util.Addressing;
236import org.apache.hadoop.hbase.util.Bytes;
237import org.apache.hadoop.hbase.util.CommonFSUtils;
238import org.apache.hadoop.hbase.util.CoprocessorConfigurationUtil;
239import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
240import org.apache.hadoop.hbase.util.FSTableDescriptors;
241import org.apache.hadoop.hbase.util.FutureUtils;
242import org.apache.hadoop.hbase.util.HBaseFsck;
243import org.apache.hadoop.hbase.util.HFileArchiveUtil;
244import org.apache.hadoop.hbase.util.IdLock;
245import org.apache.hadoop.hbase.util.JVMClusterUtil;
246import org.apache.hadoop.hbase.util.ModifyRegionUtils;
247import org.apache.hadoop.hbase.util.Pair;
248import org.apache.hadoop.hbase.util.RetryCounter;
249import org.apache.hadoop.hbase.util.RetryCounterFactory;
250import org.apache.hadoop.hbase.util.TableDescriptorChecker;
251import org.apache.hadoop.hbase.util.Threads;
252import org.apache.hadoop.hbase.util.VersionInfo;
253import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
254import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
255import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
256import org.apache.hadoop.hbase.zookeeper.ZKUtil;
257import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
258import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
259import org.apache.yetus.audience.InterfaceAudience;
260import org.apache.zookeeper.KeeperException;
261import org.slf4j.Logger;
262import org.slf4j.LoggerFactory;
263
264import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
265import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
266import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
267import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
268import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors;
269import org.apache.hbase.thirdparty.com.google.protobuf.Service;
270import org.apache.hbase.thirdparty.org.eclipse.jetty.server.Server;
271import org.apache.hbase.thirdparty.org.eclipse.jetty.server.ServerConnector;
272import org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHolder;
273import org.apache.hbase.thirdparty.org.eclipse.jetty.webapp.WebAppContext;
274import org.apache.hbase.thirdparty.org.glassfish.jersey.server.ResourceConfig;
275import org.apache.hbase.thirdparty.org.glassfish.jersey.servlet.ServletContainer;
276
277import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
278import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
279import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
280
281/**
282 * HMaster is the "master server" for HBase. An HBase cluster has one active master. If many masters
283 * are started, all compete. Whichever wins goes on to run the cluster. All others park themselves
284 * in their constructor until master or cluster shutdown or until the active master loses its lease
285 * in zookeeper. Thereafter, all running master jostle to take over master role.
286 * <p/>
287 * The Master can be asked shutdown the cluster. See {@link #shutdown()}. In this case it will tell
288 * all regionservers to go down and then wait on them all reporting in that they are down. This
289 * master will then shut itself down.
290 * <p/>
291 * You can also shutdown just this master. Call {@link #stopMaster()}.
292 * @see org.apache.zookeeper.Watcher
293 */
294@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
295public class HMaster extends HBaseServerBase<MasterRpcServices> implements MasterServices {
296
297  private static final Logger LOG = LoggerFactory.getLogger(HMaster.class);
298
299  // MASTER is name of the webapp and the attribute name used stuffing this
300  // instance into a web context !! AND OTHER PLACES !!
301  public static final String MASTER = "master";
302
303  // Manager and zk listener for master election
304  private final ActiveMasterManager activeMasterManager;
305  // Region server tracker
306  private final RegionServerTracker regionServerTracker;
307  // Draining region server tracker
308  private DrainingServerTracker drainingServerTracker;
309  // Tracker for load balancer state
310  LoadBalancerStateStore loadBalancerStateStore;
311  // Tracker for meta location, if any client ZK quorum specified
312  private MetaLocationSyncer metaLocationSyncer;
313  // Tracker for active master location, if any client ZK quorum specified
314  @InterfaceAudience.Private
315  MasterAddressSyncer masterAddressSyncer;
316  // Tracker for auto snapshot cleanup state
317  SnapshotCleanupStateStore snapshotCleanupStateStore;
318
319  // Tracker for split and merge state
320  private SplitOrMergeStateStore splitOrMergeStateStore;
321
322  private ClusterSchemaService clusterSchemaService;
323
324  public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS =
325    "hbase.master.wait.on.service.seconds";
326  public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60;
327
328  public static final String HBASE_MASTER_CLEANER_INTERVAL = "hbase.master.cleaner.interval";
329
330  public static final int DEFAULT_HBASE_MASTER_CLEANER_INTERVAL = 600 * 1000;
331
332  private String clusterId;
333
334  // Metrics for the HMaster
335  final MetricsMaster metricsMaster;
336  // file system manager for the master FS operations
337  private MasterFileSystem fileSystemManager;
338  private MasterWalManager walManager;
339
340  // manager to manage procedure-based WAL splitting, can be null if current
341  // is zk-based WAL splitting. SplitWALManager will replace SplitLogManager
342  // and MasterWalManager, which means zk-based WAL splitting code will be
343  // useless after we switch to the procedure-based one. our eventual goal
344  // is to remove all the zk-based WAL splitting code.
345  private SplitWALManager splitWALManager;
346
347  // server manager to deal with region server info
348  private volatile ServerManager serverManager;
349
350  // manager of assignment nodes in zookeeper
351  private AssignmentManager assignmentManager;
352
353  private RSGroupInfoManager rsGroupInfoManager;
354
355  // manager of replication
356  private ReplicationPeerManager replicationPeerManager;
357
358  private SyncReplicationReplayWALManager syncReplicationReplayWALManager;
359
360  // buffer for "fatal error" notices from region servers
361  // in the cluster. This is only used for assisting
362  // operations/debugging.
363  MemoryBoundedLogMessageBuffer rsFatals;
364
365  // flag set after we become the active master (used for testing)
366  private volatile boolean activeMaster = false;
367
368  // flag set after we complete initialization once active
369  private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized");
370
371  // flag set after master services are started,
372  // initialization may have not completed yet.
373  volatile boolean serviceStarted = false;
374
375  // Maximum time we should run balancer for
376  private final int maxBalancingTime;
377  // Maximum percent of regions in transition when balancing
378  private final double maxRitPercent;
379
380  private final LockManager lockManager = new LockManager(this);
381
382  private RSGroupBasedLoadBalancer balancer;
383  private BalancerChore balancerChore;
384  private static boolean disableBalancerChoreForTest = false;
385  private RegionNormalizerManager regionNormalizerManager;
386  private ClusterStatusChore clusterStatusChore;
387  private ClusterStatusPublisher clusterStatusPublisherChore = null;
388  private SnapshotCleanerChore snapshotCleanerChore = null;
389
390  private HbckChore hbckChore;
391  CatalogJanitor catalogJanitorChore;
392  // Threadpool for scanning the Old logs directory, used by the LogCleaner
393  private DirScanPool logCleanerPool;
394  private LogCleaner logCleaner;
395  // HFile cleaners for the custom hfile archive paths and the default archive path
396  // The archive path cleaner is the first element
397  private List<HFileCleaner> hfileCleaners = new ArrayList<>();
398  // The hfile cleaner paths, including custom paths and the default archive path
399  private List<Path> hfileCleanerPaths = new ArrayList<>();
400  // The shared hfile cleaner pool for the custom archive paths
401  private DirScanPool sharedHFileCleanerPool;
402  // The exclusive hfile cleaner pool for scanning the archive directory
403  private DirScanPool exclusiveHFileCleanerPool;
404  private ReplicationBarrierCleaner replicationBarrierCleaner;
405  private MobFileCleanerChore mobFileCleanerChore;
406  private MobFileCompactionChore mobFileCompactionChore;
407  private RollingUpgradeChore rollingUpgradeChore;
408  // used to synchronize the mobCompactionStates
409  private final IdLock mobCompactionLock = new IdLock();
410  // save the information of mob compactions in tables.
411  // the key is table name, the value is the number of compactions in that table.
412  private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap();
413
414  volatile MasterCoprocessorHost cpHost;
415
416  private final boolean preLoadTableDescriptors;
417
418  // Time stamps for when a hmaster became active
419  private long masterActiveTime;
420
421  // Time stamp for when HMaster finishes becoming Active Master
422  private long masterFinishedInitializationTime;
423
424  Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
425
426  // monitor for snapshot of hbase tables
427  SnapshotManager snapshotManager;
428  // monitor for distributed procedures
429  private MasterProcedureManagerHost mpmHost;
430
431  private RegionsRecoveryChore regionsRecoveryChore = null;
432
433  private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
434  // it is assigned after 'initialized' guard set to true, so should be volatile
435  private volatile MasterQuotaManager quotaManager;
436  private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
437  private QuotaObserverChore quotaObserverChore;
438  private SnapshotQuotaObserverChore snapshotQuotaChore;
439
440  private ProcedureExecutor<MasterProcedureEnv> procedureExecutor;
441  private ProcedureStore procedureStore;
442
443  // the master local storage to store procedure data, meta region locations, etc.
444  private MasterRegion masterRegion;
445
446  private RegionServerList rsListStorage;
447
448  // handle table states
449  private TableStateManager tableStateManager;
450
451  /** jetty server for master to redirect requests to regionserver infoServer */
452  private Server masterJettyServer;
453
454  // Determine if we should do normal startup or minimal "single-user" mode with no region
455  // servers and no user tables. Useful for repair and recovery of hbase:meta
456  private final boolean maintenanceMode;
457  static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
458
459  // the in process region server for carry system regions in maintenanceMode
460  private JVMClusterUtil.RegionServerThread maintenanceRegionServer;
461
462  // Cached clusterId on stand by masters to serve clusterID requests from clients.
463  private final CachedClusterId cachedClusterId;
464
465  public static final String WARMUP_BEFORE_MOVE = "hbase.master.warmup.before.move";
466  private static final boolean DEFAULT_WARMUP_BEFORE_MOVE = true;
467
468  /**
469   * Initializes the HMaster. The steps are as follows:
470   * <p>
471   * <ol>
472   * <li>Initialize the local HRegionServer
473   * <li>Start the ActiveMasterManager.
474   * </ol>
475   * <p>
476   * Remaining steps of initialization occur in
477   * {@link #finishActiveMasterInitialization(MonitoredTask)} after the master becomes the active
478   * one.
479   */
480  public HMaster(final Configuration conf) throws IOException {
481    super(conf, "Master");
482    final Span span = TraceUtil.createSpan("HMaster.cxtor");
483    try (Scope ignored = span.makeCurrent()) {
484      if (conf.getBoolean(MAINTENANCE_MODE, false)) {
485        LOG.info("Detected {}=true via configuration.", MAINTENANCE_MODE);
486        maintenanceMode = true;
487      } else if (Boolean.getBoolean(MAINTENANCE_MODE)) {
488        LOG.info("Detected {}=true via environment variables.", MAINTENANCE_MODE);
489        maintenanceMode = true;
490      } else {
491        maintenanceMode = false;
492      }
493      this.rsFatals = new MemoryBoundedLogMessageBuffer(
494        conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
495      LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}",
496        CommonFSUtils.getRootDir(this.conf),
497        this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false));
498
499      // Disable usage of meta replicas in the master
500      this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
501
502      decorateMasterConfiguration(this.conf);
503
504      // Hack! Maps DFSClient => Master for logs. HDFS made this
505      // config param for task trackers, but we can piggyback off of it.
506      if (this.conf.get("mapreduce.task.attempt.id") == null) {
507        this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString());
508      }
509
510      this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this));
511
512      // preload table descriptor at startup
513      this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true);
514
515      this.maxBalancingTime = getMaxBalancingTime();
516      this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
517        HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
518
519      // Do we publish the status?
520
521      boolean shouldPublish =
522        conf.getBoolean(HConstants.STATUS_PUBLISHED, HConstants.STATUS_PUBLISHED_DEFAULT);
523      Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
524        conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
525          ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
526          ClusterStatusPublisher.Publisher.class);
527
528      if (shouldPublish) {
529        if (publisherClass == null) {
530          LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but "
531            + ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS
532            + " is not set - not publishing status");
533        } else {
534          clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
535          LOG.debug("Created {}", this.clusterStatusPublisherChore);
536          getChoreService().scheduleChore(clusterStatusPublisherChore);
537        }
538      }
539      this.activeMasterManager = createActiveMasterManager(zooKeeper, serverName, this);
540      cachedClusterId = new CachedClusterId(this, conf);
541      this.regionServerTracker = new RegionServerTracker(zooKeeper, this);
542      this.rpcServices.start(zooKeeper);
543      span.setStatus(StatusCode.OK);
544    } catch (Throwable t) {
545      // Make sure we log the exception. HMaster is often started via reflection and the
546      // cause of failed startup is lost.
547      TraceUtil.setError(span, t);
548      LOG.error("Failed construction of Master", t);
549      throw t;
550    } finally {
551      span.end();
552    }
553  }
554
555  /**
556   * Protected to have custom implementations in tests override the default ActiveMaster
557   * implementation.
558   */
559  protected ActiveMasterManager createActiveMasterManager(ZKWatcher zk, ServerName sn,
560    org.apache.hadoop.hbase.Server server) throws InterruptedIOException {
561    return new ActiveMasterManager(zk, sn, server);
562  }
563
564  @Override
565  protected String getUseThisHostnameInstead(Configuration conf) {
566    return conf.get(MASTER_HOSTNAME_KEY);
567  }
568
569  private void registerConfigurationObservers() {
570    configurationManager.registerObserver(this.rpcServices);
571    configurationManager.registerObserver(this);
572  }
573
574  // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will
575  // block in here until then.
576  @Override
577  public void run() {
578    try {
579      installShutdownHook();
580      registerConfigurationObservers();
581      Threads.setDaemonThreadRunning(new Thread(TraceUtil.tracedRunnable(() -> {
582        try {
583          int infoPort = putUpJettyServer();
584          startActiveMasterManager(infoPort);
585        } catch (Throwable t) {
586          // Make sure we log the exception.
587          String error = "Failed to become Active Master";
588          LOG.error(error, t);
589          // Abort should have been called already.
590          if (!isAborted()) {
591            abort(error, t);
592          }
593        }
594      }, "HMaster.becomeActiveMaster")), getName() + ":becomeActiveMaster");
595      while (!isStopped() && !isAborted()) {
596        sleeper.sleep();
597      }
598      final Span span = TraceUtil.createSpan("HMaster exiting main loop");
599      try (Scope ignored = span.makeCurrent()) {
600        stopInfoServer();
601        closeClusterConnection();
602        stopServiceThreads();
603        if (this.rpcServices != null) {
604          this.rpcServices.stop();
605        }
606        closeZooKeeper();
607        closeTableDescriptors();
608        span.setStatus(StatusCode.OK);
609      } finally {
610        span.end();
611      }
612    } finally {
613      if (this.clusterSchemaService != null) {
614        // If on way out, then we are no longer active master.
615        this.clusterSchemaService.stopAsync();
616        try {
617          this.clusterSchemaService
618            .awaitTerminated(getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
619              DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
620        } catch (TimeoutException te) {
621          LOG.warn("Failed shutdown of clusterSchemaService", te);
622        }
623      }
624      this.activeMaster = false;
625    }
626  }
627
628  // return the actual infoPort, -1 means disable info server.
629  private int putUpJettyServer() throws IOException {
630    if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) {
631      return -1;
632    }
633    final int infoPort =
634      conf.getInt("hbase.master.info.port.orig", HConstants.DEFAULT_MASTER_INFOPORT);
635    // -1 is for disabling info server, so no redirecting
636    if (infoPort < 0 || infoServer == null) {
637      return -1;
638    }
639    if (infoPort == infoServer.getPort()) {
640      // server is already running
641      return infoPort;
642    }
643    final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0");
644    if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
645      String msg = "Failed to start redirecting jetty server. Address " + addr
646        + " does not belong to this host. Correct configuration parameter: "
647        + "hbase.master.info.bindAddress";
648      LOG.error(msg);
649      throw new IOException(msg);
650    }
651
652    // TODO I'm pretty sure we could just add another binding to the InfoServer run by
653    // the RegionServer and have it run the RedirectServlet instead of standing up
654    // a second entire stack here.
655    masterJettyServer = new Server();
656    final ServerConnector connector = new ServerConnector(masterJettyServer);
657    connector.setHost(addr);
658    connector.setPort(infoPort);
659    masterJettyServer.addConnector(connector);
660    masterJettyServer.setStopAtShutdown(true);
661    masterJettyServer.setHandler(HttpServer.buildGzipHandler(masterJettyServer.getHandler()));
662
663    final String redirectHostname =
664      StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead;
665
666    final MasterRedirectServlet redirect = new MasterRedirectServlet(infoServer, redirectHostname);
667    final WebAppContext context =
668      new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS);
669    context.addServlet(new ServletHolder(redirect), "/*");
670    context.setServer(masterJettyServer);
671
672    try {
673      masterJettyServer.start();
674    } catch (Exception e) {
675      throw new IOException("Failed to start redirecting jetty server", e);
676    }
677    return connector.getLocalPort();
678  }
679
680  /**
681   * For compatibility, if failed with regionserver credentials, try the master one
682   */
683  @Override
684  protected void login(UserProvider user, String host) throws IOException {
685    try {
686      user.login(SecurityConstants.REGIONSERVER_KRB_KEYTAB_FILE,
687        SecurityConstants.REGIONSERVER_KRB_PRINCIPAL, host);
688    } catch (IOException ie) {
689      user.login(SecurityConstants.MASTER_KRB_KEYTAB_FILE, SecurityConstants.MASTER_KRB_PRINCIPAL,
690        host);
691    }
692  }
693
694  public MasterRpcServices getMasterRpcServices() {
695    return rpcServices;
696  }
697
698  public boolean balanceSwitch(final boolean b) throws IOException {
699    return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC);
700  }
701
702  @Override
703  protected String getProcessName() {
704    return MASTER;
705  }
706
707  @Override
708  protected boolean canCreateBaseZNode() {
709    return true;
710  }
711
712  @Override
713  protected boolean canUpdateTableDescriptor() {
714    return true;
715  }
716
717  @Override
718  protected boolean cacheTableDescriptor() {
719    return true;
720  }
721
722  protected MasterRpcServices createRpcServices() throws IOException {
723    return new MasterRpcServices(this);
724  }
725
726  @Override
727  protected void configureInfoServer(InfoServer infoServer) {
728    infoServer.addUnprivilegedServlet("master-status", "/master-status", MasterStatusServlet.class);
729    infoServer.addUnprivilegedServlet("api_v1", "/api/v1/*", buildApiV1Servlet());
730
731    infoServer.setAttribute(MASTER, this);
732  }
733
734  private ServletHolder buildApiV1Servlet() {
735    final ResourceConfig config = ResourceConfigFactory.createResourceConfig(conf, this);
736    return new ServletHolder(new ServletContainer(config));
737  }
738
739  @Override
740  protected Class<? extends HttpServlet> getDumpServlet() {
741    return MasterDumpServlet.class;
742  }
743
744  @Override
745  public MetricsMaster getMasterMetrics() {
746    return metricsMaster;
747  }
748
749  /**
750   * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
751   * should have already been initialized along with {@link ServerManager}.
752   */
753  private void initializeZKBasedSystemTrackers()
754    throws IOException, KeeperException, ReplicationException, DeserializationException {
755    if (maintenanceMode) {
756      // in maintenance mode, always use MaintenanceLoadBalancer.
757      conf.unset(LoadBalancer.HBASE_RSGROUP_LOADBALANCER_CLASS);
758      conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MaintenanceLoadBalancer.class,
759        LoadBalancer.class);
760    }
761    this.balancer = new RSGroupBasedLoadBalancer();
762    this.loadBalancerStateStore = new LoadBalancerStateStore(masterRegion, zooKeeper);
763
764    this.regionNormalizerManager =
765      RegionNormalizerFactory.createNormalizerManager(conf, masterRegion, zooKeeper, this);
766    this.configurationManager.registerObserver(regionNormalizerManager);
767    this.regionNormalizerManager.start();
768
769    this.splitOrMergeStateStore = new SplitOrMergeStateStore(masterRegion, zooKeeper, conf);
770
771    // This is for backwards compatible. We do not need the CP for rs group now but if user want to
772    // load it, we need to enable rs group.
773    String[] cpClasses = conf.getStrings(MasterCoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
774    if (cpClasses != null) {
775      for (String cpClass : cpClasses) {
776        if (RSGroupAdminEndpoint.class.getName().equals(cpClass)) {
777          RSGroupUtil.enableRSGroup(conf);
778          break;
779        }
780      }
781    }
782    this.rsGroupInfoManager = RSGroupInfoManager.create(this);
783
784    this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf, clusterId);
785
786    this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
787    this.drainingServerTracker.start();
788
789    this.snapshotCleanupStateStore = new SnapshotCleanupStateStore(masterRegion, zooKeeper);
790
791    String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM);
792    boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE,
793      HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE);
794    if (clientQuorumServers != null && !clientZkObserverMode) {
795      // we need to take care of the ZK information synchronization
796      // if given client ZK are not observer nodes
797      ZKWatcher clientZkWatcher = new ZKWatcher(conf,
798        getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this,
799        false, true);
800      this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this);
801      this.metaLocationSyncer.start();
802      this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this);
803      this.masterAddressSyncer.start();
804      // set cluster id is a one-go effort
805      ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId());
806    }
807
808    // Set the cluster as up. If new RSs, they'll be waiting on this before
809    // going ahead with their startup.
810    boolean wasUp = this.clusterStatusTracker.isClusterUp();
811    if (!wasUp) this.clusterStatusTracker.setClusterUp();
812
813    LOG.info("Active/primary master=" + this.serverName + ", sessionid=0x"
814      + Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId())
815      + ", setting cluster-up flag (Was=" + wasUp + ")");
816
817    // create/initialize the snapshot manager and other procedure managers
818    this.snapshotManager = new SnapshotManager();
819    this.mpmHost = new MasterProcedureManagerHost();
820    this.mpmHost.register(this.snapshotManager);
821    this.mpmHost.register(new MasterFlushTableProcedureManager());
822    this.mpmHost.loadProcedures(conf);
823    this.mpmHost.initialize(this, this.metricsMaster);
824  }
825
826  // Will be overriden in test to inject customized AssignmentManager
827  @InterfaceAudience.Private
828  protected AssignmentManager createAssignmentManager(MasterServices master,
829    MasterRegion masterRegion) {
830    return new AssignmentManager(master, masterRegion);
831  }
832
833  private void tryMigrateMetaLocationsFromZooKeeper() throws IOException, KeeperException {
834    // try migrate data from zookeeper
835    try (ResultScanner scanner =
836      masterRegion.getScanner(new Scan().addFamily(HConstants.CATALOG_FAMILY))) {
837      if (scanner.next() != null) {
838        // notice that all replicas for a region are in the same row, so the migration can be
839        // done with in a one row put, which means if we have data in catalog family then we can
840        // make sure that the migration is done.
841        LOG.info("The {} family in master local region already has data in it, skip migrating...",
842          HConstants.CATALOG_FAMILY_STR);
843        return;
844      }
845    }
846    // start migrating
847    byte[] row = CatalogFamilyFormat.getMetaKeyForRegion(RegionInfoBuilder.FIRST_META_REGIONINFO);
848    Put put = new Put(row);
849    List<String> metaReplicaNodes = zooKeeper.getMetaReplicaNodes();
850    StringBuilder info = new StringBuilder("Migrating meta locations:");
851    for (String metaReplicaNode : metaReplicaNodes) {
852      int replicaId = zooKeeper.getZNodePaths().getMetaReplicaIdFromZNode(metaReplicaNode);
853      RegionState state = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
854      info.append(" ").append(state);
855      put.setTimestamp(state.getStamp());
856      MetaTableAccessor.addRegionInfo(put, state.getRegion());
857      if (state.getServerName() != null) {
858        MetaTableAccessor.addLocation(put, state.getServerName(), HConstants.NO_SEQNUM, replicaId);
859      }
860      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
861        .setFamily(HConstants.CATALOG_FAMILY)
862        .setQualifier(RegionStateStore.getStateColumn(replicaId)).setTimestamp(put.getTimestamp())
863        .setType(Cell.Type.Put).setValue(Bytes.toBytes(state.getState().name())).build());
864    }
865    if (!put.isEmpty()) {
866      LOG.info(info.toString());
867      masterRegion.update(r -> r.put(put));
868    } else {
869      LOG.info("No meta location available on zookeeper, skip migrating...");
870    }
871  }
872
873  /**
874   * Finish initialization of HMaster after becoming the primary master.
875   * <p/>
876   * The startup order is a bit complicated but very important, do not change it unless you know
877   * what you are doing.
878   * <ol>
879   * <li>Initialize file system based components - file system manager, wal manager, table
880   * descriptors, etc</li>
881   * <li>Publish cluster id</li>
882   * <li>Here comes the most complicated part - initialize server manager, assignment manager and
883   * region server tracker
884   * <ol type='i'>
885   * <li>Create server manager</li>
886   * <li>Create master local region</li>
887   * <li>Create procedure executor, load the procedures, but do not start workers. We will start it
888   * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same
889   * server</li>
890   * <li>Create assignment manager and start it, load the meta region state, but do not load data
891   * from meta region</li>
892   * <li>Start region server tracker, construct the online servers set and find out dead servers and
893   * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also
894   * scan the wal directory and load from master local region to find out possible live region
895   * servers, and the differences between these two sets are the dead servers</li>
896   * </ol>
897   * </li>
898   * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li>
899   * <li>Start necessary service threads - balancer, catalog janitor, executor services, and also
900   * the procedure executor, etc. Notice that the balancer must be created first as assignment
901   * manager may use it when assigning regions.</li>
902   * <li>Wait for meta to be initialized if necessary, start table state manager.</li>
903   * <li>Wait for enough region servers to check-in</li>
904   * <li>Let assignment manager load data from meta and construct region states</li>
905   * <li>Start all other things such as chore services, etc</li>
906   * </ol>
907   * <p/>
908   * Notice that now we will not schedule a special procedure to make meta online(unless the first
909   * time where meta has not been created yet), we will rely on SCP to bring meta online.
910   */
911  private void finishActiveMasterInitialization(MonitoredTask status) throws IOException,
912    InterruptedException, KeeperException, ReplicationException, DeserializationException {
913    /*
914     * We are active master now... go initialize components we need to run.
915     */
916    status.setStatus("Initializing Master file system");
917
918    this.masterActiveTime = EnvironmentEdgeManager.currentTime();
919    // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring.
920
921    // always initialize the MemStoreLAB as we use a region to store data in master now, see
922    // localStore.
923    initializeMemStoreChunkCreator(null);
924    this.fileSystemManager = new MasterFileSystem(conf);
925    this.walManager = new MasterWalManager(this);
926
927    // warm-up HTDs cache on master initialization
928    if (preLoadTableDescriptors) {
929      status.setStatus("Pre-loading table descriptors");
930      this.tableDescriptors.getAll();
931    }
932
933    // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
934    // only after it has checked in with the Master. At least a few tests ask Master for clusterId
935    // before it has called its run method and before RegionServer has done the reportForDuty.
936    ClusterId clusterId = fileSystemManager.getClusterId();
937    status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper");
938    ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
939    this.clusterId = clusterId.toString();
940
941    // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
942    // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
943    // hbase.write.hbck1.lock.file to false.
944    if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
945      Pair<Path, FSDataOutputStream> result = null;
946      try {
947        result = HBaseFsck.checkAndMarkRunningHbck(this.conf,
948          HBaseFsck.createLockRetryCounterFactory(this.conf).create());
949      } finally {
950        if (result != null) {
951          Closeables.close(result.getSecond(), true);
952        }
953      }
954    }
955
956    status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
957    // The below two managers must be created before loading procedures, as they will be used during
958    // loading.
959    // initialize master local region
960    masterRegion = MasterRegionFactory.create(this);
961    rsListStorage = new MasterRegionServerList(masterRegion, this);
962
963    this.serverManager = createServerManager(this, rsListStorage);
964    this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
965    if (
966      !conf.getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)
967    ) {
968      this.splitWALManager = new SplitWALManager(this);
969    }
970
971    tryMigrateMetaLocationsFromZooKeeper();
972
973    createProcedureExecutor();
974    Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType = procedureExecutor
975      .getActiveProceduresNoCopy().stream().collect(Collectors.groupingBy(p -> p.getClass()));
976
977    // Create Assignment Manager
978    this.assignmentManager = createAssignmentManager(this, masterRegion);
979    this.assignmentManager.start();
980    // TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as
981    // completed, it could still be in the procedure list. This is a bit strange but is another
982    // story, need to verify the implementation for ProcedureExecutor and ProcedureStore.
983    List<TransitRegionStateProcedure> ritList =
984      procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream()
985        .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p)
986        .collect(Collectors.toList());
987    this.assignmentManager.setupRIT(ritList);
988
989    // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
990    // be registered in the deadServers set -- and the servernames loaded from the WAL directory
991    // and master local region that COULD BE 'alive'(we'll schedule SCPs for each and let SCP figure
992    // it out).
993    // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker.
994    // TODO: Generate the splitting and live Set in one pass instead of two as we currently do.
995    this.regionServerTracker.upgrade(
996      procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
997        .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()),
998      Sets.union(rsListStorage.getAll(), walManager.getLiveServersFromWALDir()),
999      walManager.getSplittingServersFromWALDir());
1000    // This manager must be accessed AFTER hbase:meta is confirmed on line..
1001    this.tableStateManager = new TableStateManager(this);
1002
1003    status.setStatus("Initializing ZK system trackers");
1004    initializeZKBasedSystemTrackers();
1005    status.setStatus("Loading last flushed sequence id of regions");
1006    try {
1007      this.serverManager.loadLastFlushedSequenceIds();
1008    } catch (IOException e) {
1009      LOG.info("Failed to load last flushed sequence id of regions" + " from file system", e);
1010    }
1011    // Set ourselves as active Master now our claim has succeeded up in zk.
1012    this.activeMaster = true;
1013
1014    // Start the Zombie master detector after setting master as active, see HBASE-21535
1015    Thread zombieDetector = new Thread(new MasterInitializationMonitor(this),
1016      "ActiveMasterInitializationMonitor-" + EnvironmentEdgeManager.currentTime());
1017    zombieDetector.setDaemon(true);
1018    zombieDetector.start();
1019
1020    if (!maintenanceMode) {
1021      status.setStatus("Initializing master coprocessors");
1022      setQuotasObserver(conf);
1023      initializeCoprocessorHost(conf);
1024    } else {
1025      // start an in process region server for carrying system regions
1026      maintenanceRegionServer =
1027        JVMClusterUtil.createRegionServerThread(getConfiguration(), HRegionServer.class, 0);
1028      maintenanceRegionServer.start();
1029    }
1030
1031    // Checking if meta needs initializing.
1032    status.setStatus("Initializing meta table if this is a new deploy");
1033    InitMetaProcedure initMetaProc = null;
1034    // Print out state of hbase:meta on startup; helps debugging.
1035    if (!this.assignmentManager.getRegionStates().hasTableRegionStates(TableName.META_TABLE_NAME)) {
1036      Optional<InitMetaProcedure> optProc = procedureExecutor.getProcedures().stream()
1037        .filter(p -> p instanceof InitMetaProcedure).map(o -> (InitMetaProcedure) o).findAny();
1038      initMetaProc = optProc.orElseGet(() -> {
1039        // schedule an init meta procedure if meta has not been deployed yet
1040        InitMetaProcedure temp = new InitMetaProcedure();
1041        procedureExecutor.submitProcedure(temp);
1042        return temp;
1043      });
1044    }
1045
1046    // initialize load balancer
1047    this.balancer.setMasterServices(this);
1048    this.balancer.initialize();
1049    this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
1050
1051    // start up all service threads.
1052    status.setStatus("Initializing master service threads");
1053    startServiceThreads();
1054    // wait meta to be initialized after we start procedure executor
1055    if (initMetaProc != null) {
1056      initMetaProc.await();
1057    }
1058    // Wake up this server to check in
1059    sleeper.skipSleepCycle();
1060
1061    // Wait for region servers to report in.
1062    // With this as part of master initialization, it precludes our being able to start a single
1063    // server that is both Master and RegionServer. Needs more thought. TODO.
1064    String statusStr = "Wait for region servers to report in";
1065    status.setStatus(statusStr);
1066    LOG.info(Objects.toString(status));
1067    waitForRegionServers(status);
1068
1069    // Check if master is shutting down because issue initializing regionservers or balancer.
1070    if (isStopped()) {
1071      return;
1072    }
1073
1074    status.setStatus("Starting assignment manager");
1075    // FIRST HBASE:META READ!!!!
1076    // The below cannot make progress w/o hbase:meta being online.
1077    // This is the FIRST attempt at going to hbase:meta. Meta on-lining is going on in background
1078    // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta
1079    // if it is down. It may take a while to come online. So, wait here until meta if for sure
1080    // available. That's what waitForMetaOnline does.
1081    if (!waitForMetaOnline()) {
1082      return;
1083    }
1084
1085    TableDescriptor metaDescriptor = tableDescriptors.get(TableName.META_TABLE_NAME);
1086    final ColumnFamilyDescriptor tableFamilyDesc =
1087      metaDescriptor.getColumnFamily(HConstants.TABLE_FAMILY);
1088    final ColumnFamilyDescriptor replBarrierFamilyDesc =
1089      metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
1090
1091    this.assignmentManager.joinCluster();
1092    // The below depends on hbase:meta being online.
1093    this.assignmentManager.processOfflineRegions();
1094    // this must be called after the above processOfflineRegions to prevent race
1095    this.assignmentManager.wakeMetaLoadedEvent();
1096
1097    // for migrating from a version without HBASE-25099, and also for honoring the configuration
1098    // first.
1099    if (conf.get(HConstants.META_REPLICAS_NUM) != null) {
1100      int replicasNumInConf =
1101        conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM);
1102      TableDescriptor metaDesc = tableDescriptors.get(TableName.META_TABLE_NAME);
1103      if (metaDesc.getRegionReplication() != replicasNumInConf) {
1104        // it is possible that we already have some replicas before upgrading, so we must set the
1105        // region replication number in meta TableDescriptor directly first, without creating a
1106        // ModifyTableProcedure, otherwise it may cause a double assign for the meta replicas.
1107        int existingReplicasCount =
1108          assignmentManager.getRegionStates().getRegionsOfTable(TableName.META_TABLE_NAME).size();
1109        if (existingReplicasCount > metaDesc.getRegionReplication()) {
1110          LOG.info("Update replica count of hbase:meta from {}(in TableDescriptor)"
1111            + " to {}(existing ZNodes)", metaDesc.getRegionReplication(), existingReplicasCount);
1112          metaDesc = TableDescriptorBuilder.newBuilder(metaDesc)
1113            .setRegionReplication(existingReplicasCount).build();
1114          tableDescriptors.update(metaDesc);
1115        }
1116        // check again, and issue a ModifyTableProcedure if needed
1117        if (metaDesc.getRegionReplication() != replicasNumInConf) {
1118          LOG.info(
1119            "The {} config is {} while the replica count in TableDescriptor is {}"
1120              + " for hbase:meta, altering...",
1121            HConstants.META_REPLICAS_NUM, replicasNumInConf, metaDesc.getRegionReplication());
1122          procedureExecutor.submitProcedure(new ModifyTableProcedure(
1123            procedureExecutor.getEnvironment(), TableDescriptorBuilder.newBuilder(metaDesc)
1124              .setRegionReplication(replicasNumInConf).build(),
1125            null, metaDesc, false));
1126        }
1127      }
1128    }
1129    // Initialize after meta is up as below scans meta
1130    FavoredNodesManager fnm = getFavoredNodesManager();
1131    if (fnm != null) {
1132      fnm.initializeFromMeta();
1133    }
1134
1135    // set cluster status again after user regions are assigned
1136    this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
1137
1138    // Start balancer and meta catalog janitor after meta and regions have been assigned.
1139    status.setStatus("Starting balancer and catalog janitor");
1140    this.clusterStatusChore = new ClusterStatusChore(this, balancer);
1141    getChoreService().scheduleChore(clusterStatusChore);
1142    this.balancerChore = new BalancerChore(this);
1143    if (!disableBalancerChoreForTest) {
1144      getChoreService().scheduleChore(balancerChore);
1145    }
1146    if (regionNormalizerManager != null) {
1147      getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore());
1148    }
1149    this.catalogJanitorChore = new CatalogJanitor(this);
1150    getChoreService().scheduleChore(catalogJanitorChore);
1151    this.hbckChore = new HbckChore(this);
1152    getChoreService().scheduleChore(hbckChore);
1153    this.serverManager.startChore();
1154
1155    // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
1156    if (!waitForNamespaceOnline()) {
1157      return;
1158    }
1159    status.setStatus("Starting cluster schema service");
1160    try {
1161      initClusterSchemaService();
1162    } catch (IllegalStateException e) {
1163      if (
1164        e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
1165          && tableFamilyDesc == null && replBarrierFamilyDesc == null
1166      ) {
1167        LOG.info("ClusterSchema service could not be initialized. This is "
1168          + "expected during HBase 1 to 2 upgrade", e);
1169      } else {
1170        throw e;
1171      }
1172    }
1173
1174    if (this.cpHost != null) {
1175      try {
1176        this.cpHost.preMasterInitialization();
1177      } catch (IOException e) {
1178        LOG.error("Coprocessor preMasterInitialization() hook failed", e);
1179      }
1180    }
1181
1182    status.markComplete("Initialization successful");
1183    LOG.info(String.format("Master has completed initialization %.3fsec",
1184      (EnvironmentEdgeManager.currentTime() - masterActiveTime) / 1000.0f));
1185    this.masterFinishedInitializationTime = EnvironmentEdgeManager.currentTime();
1186    configurationManager.registerObserver(this.balancer);
1187    configurationManager.registerObserver(this.logCleanerPool);
1188    configurationManager.registerObserver(this.logCleaner);
1189    configurationManager.registerObserver(this.regionsRecoveryConfigManager);
1190    configurationManager.registerObserver(this.exclusiveHFileCleanerPool);
1191    if (this.sharedHFileCleanerPool != null) {
1192      configurationManager.registerObserver(this.sharedHFileCleanerPool);
1193    }
1194    if (this.hfileCleaners != null) {
1195      for (HFileCleaner cleaner : hfileCleaners) {
1196        configurationManager.registerObserver(cleaner);
1197      }
1198    }
1199    // Set master as 'initialized'.
1200    setInitialized(true);
1201
1202    if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
1203      // create missing CFs in meta table after master is set to 'initialized'.
1204      createMissingCFsInMetaDuringUpgrade(metaDescriptor);
1205
1206      // Throwing this Exception to abort active master is painful but this
1207      // seems the only way to add missing CFs in meta while upgrading from
1208      // HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
1209      // So, why do we abort active master after adding missing CFs in meta?
1210      // When we reach here, we would have already bypassed NoSuchColumnFamilyException
1211      // in initClusterSchemaService(), meaning ClusterSchemaService is not
1212      // correctly initialized but we bypassed it. Similarly, we bypassed
1213      // tableStateManager.start() as well. Hence, we should better abort
1214      // current active master because our main task - adding missing CFs
1215      // in meta table is done (possible only after master state is set as
1216      // initialized) at the expense of bypassing few important tasks as part
1217      // of active master init routine. So now we abort active master so that
1218      // next active master init will not face any issues and all mandatory
1219      // services will be started during master init phase.
1220      throw new PleaseRestartMasterException("Aborting active master after missing"
1221        + " CFs are successfully added in meta. Subsequent active master "
1222        + "initialization should be uninterrupted");
1223    }
1224
1225    if (maintenanceMode) {
1226      LOG.info("Detected repair mode, skipping final initialization steps.");
1227      return;
1228    }
1229
1230    assignmentManager.checkIfShouldMoveSystemRegionAsync();
1231    status.setStatus("Starting quota manager");
1232    initQuotaManager();
1233    if (QuotaUtil.isQuotaEnabled(conf)) {
1234      // Create the quota snapshot notifier
1235      spaceQuotaSnapshotNotifier = createQuotaSnapshotNotifier();
1236      spaceQuotaSnapshotNotifier.initialize(getConnection());
1237      this.quotaObserverChore = new QuotaObserverChore(this, getMasterMetrics());
1238      // Start the chore to read the region FS space reports and act on them
1239      getChoreService().scheduleChore(quotaObserverChore);
1240
1241      this.snapshotQuotaChore = new SnapshotQuotaObserverChore(this, getMasterMetrics());
1242      // Start the chore to read snapshots and add their usage to table/NS quotas
1243      getChoreService().scheduleChore(snapshotQuotaChore);
1244    }
1245    final SlowLogMasterService slowLogMasterService = new SlowLogMasterService(conf, this);
1246    slowLogMasterService.init();
1247
1248    WALEventTrackerTableCreator.createIfNeededAndNotExists(conf, this);
1249    // Create REPLICATION.SINK_TRACKER table if needed.
1250    ReplicationSinkTrackerTableCreator.createIfNeededAndNotExists(conf, this);
1251
1252    // clear the dead servers with same host name and port of online server because we are not
1253    // removing dead server with same hostname and port of rs which is trying to check in before
1254    // master initialization. See HBASE-5916.
1255    this.serverManager.clearDeadServersWithSameHostNameAndPortOfOnlineServer();
1256
1257    // Check and set the znode ACLs if needed in case we are overtaking a non-secure configuration
1258    status.setStatus("Checking ZNode ACLs");
1259    zooKeeper.checkAndSetZNodeAcls();
1260
1261    status.setStatus("Initializing MOB Cleaner");
1262    initMobCleaner();
1263
1264    status.setStatus("Calling postStartMaster coprocessors");
1265    if (this.cpHost != null) {
1266      // don't let cp initialization errors kill the master
1267      try {
1268        this.cpHost.postStartMaster();
1269      } catch (IOException ioe) {
1270        LOG.error("Coprocessor postStartMaster() hook failed", ioe);
1271      }
1272    }
1273
1274    zombieDetector.interrupt();
1275
1276    /*
1277     * After master has started up, lets do balancer post startup initialization. Since this runs in
1278     * activeMasterManager thread, it should be fine.
1279     */
1280    long start = EnvironmentEdgeManager.currentTime();
1281    this.balancer.postMasterStartupInitialize();
1282    if (LOG.isDebugEnabled()) {
1283      LOG.debug("Balancer post startup initialization complete, took "
1284        + ((EnvironmentEdgeManager.currentTime() - start) / 1000) + " seconds");
1285    }
1286
1287    this.rollingUpgradeChore = new RollingUpgradeChore(this);
1288    getChoreService().scheduleChore(rollingUpgradeChore);
1289  }
1290
1291  private void createMissingCFsInMetaDuringUpgrade(TableDescriptor metaDescriptor)
1292    throws IOException {
1293    TableDescriptor newMetaDesc = TableDescriptorBuilder.newBuilder(metaDescriptor)
1294      .setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
1295      .setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta()).build();
1296    long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc, 0, 0, false);
1297    int tries = 30;
1298    while (
1299      !(getMasterProcedureExecutor().isFinished(pid)) && getMasterProcedureExecutor().isRunning()
1300        && tries > 0
1301    ) {
1302      try {
1303        Thread.sleep(1000);
1304      } catch (InterruptedException e) {
1305        throw new IOException("Wait interrupted", e);
1306      }
1307      tries--;
1308    }
1309    if (tries <= 0) {
1310      throw new HBaseIOException(
1311        "Failed to add table and rep_barrier CFs to meta in a given time.");
1312    } else {
1313      Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
1314      if (result != null && result.isFailed()) {
1315        throw new IOException("Failed to add table and rep_barrier CFs to meta. "
1316          + MasterProcedureUtil.unwrapRemoteIOException(result));
1317      }
1318    }
1319  }
1320
1321  /**
1322   * Check hbase:meta is up and ready for reading. For use during Master startup only.
1323   * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
1324   *         and we will hold here until operator intervention.
1325   */
1326  @InterfaceAudience.Private
1327  public boolean waitForMetaOnline() {
1328    return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO);
1329  }
1330
1331  /**
1332   * @return True if region is online and scannable else false if an error or shutdown (Otherwise we
1333   *         just block in here holding up all forward-progess).
1334   */
1335  private boolean isRegionOnline(RegionInfo ri) {
1336    RetryCounter rc = null;
1337    while (!isStopped()) {
1338      RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri);
1339      if (rs.isOpened()) {
1340        if (this.getServerManager().isServerOnline(rs.getServerName())) {
1341          return true;
1342        }
1343      }
1344      // Region is not OPEN.
1345      Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures()
1346        .stream().filter(p -> p instanceof ServerCrashProcedure).findAny();
1347      // TODO: Add a page to refguide on how to do repair. Have this log message point to it.
1348      // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and
1349      // then how to assign including how to break region lock if one held.
1350      LOG.warn(
1351        "{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot "
1352          + "progress, in holding-pattern until region onlined.",
1353        ri.getRegionNameAsString(), rs, optProc.isPresent());
1354      // Check once-a-minute.
1355      if (rc == null) {
1356        rc = new RetryCounterFactory(Integer.MAX_VALUE, 1000, 60_000).create();
1357      }
1358      Threads.sleep(rc.getBackoffTimeAndIncrementAttempts());
1359    }
1360    return false;
1361  }
1362
1363  /**
1364   * Check hbase:namespace table is assigned. If not, startup will hang looking for the ns table
1365   * <p/>
1366   * This is for rolling upgrading, later we will migrate the data in ns table to the ns family of
1367   * meta table. And if this is a new cluster, this method will return immediately as there will be
1368   * no namespace table/region.
1369   * @return True if namespace table is up/online.
1370   */
1371  private boolean waitForNamespaceOnline() throws IOException {
1372    TableState nsTableState =
1373      MetaTableAccessor.getTableState(getConnection(), TableName.NAMESPACE_TABLE_NAME);
1374    if (nsTableState == null || nsTableState.isDisabled()) {
1375      // this means we have already migrated the data and disabled or deleted the namespace table,
1376      // or this is a new deploy which does not have a namespace table from the beginning.
1377      return true;
1378    }
1379    List<RegionInfo> ris =
1380      this.assignmentManager.getRegionStates().getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME);
1381    if (ris.isEmpty()) {
1382      // maybe this will not happen any more, but anyway, no harm to add a check here...
1383      return true;
1384    }
1385    // Else there are namespace regions up in meta. Ensure they are assigned before we go on.
1386    for (RegionInfo ri : ris) {
1387      if (!isRegionOnline(ri)) {
1388        return false;
1389      }
1390    }
1391    return true;
1392  }
1393
1394  /**
1395   * Adds the {@code MasterQuotasObserver} to the list of configured Master observers to
1396   * automatically remove quotas for a table when that table is deleted.
1397   */
1398  @InterfaceAudience.Private
1399  public void updateConfigurationForQuotasObserver(Configuration conf) {
1400    // We're configured to not delete quotas on table deletion, so we don't need to add the obs.
1401    if (
1402      !conf.getBoolean(MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE,
1403        MasterQuotasObserver.REMOVE_QUOTA_ON_TABLE_DELETE_DEFAULT)
1404    ) {
1405      return;
1406    }
1407    String[] masterCoprocs = conf.getStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
1408    final int length = null == masterCoprocs ? 0 : masterCoprocs.length;
1409    String[] updatedCoprocs = new String[length + 1];
1410    if (length > 0) {
1411      System.arraycopy(masterCoprocs, 0, updatedCoprocs, 0, masterCoprocs.length);
1412    }
1413    updatedCoprocs[length] = MasterQuotasObserver.class.getName();
1414    conf.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, updatedCoprocs);
1415  }
1416
1417  private void initMobCleaner() {
1418    this.mobFileCleanerChore = new MobFileCleanerChore(this);
1419    getChoreService().scheduleChore(mobFileCleanerChore);
1420    this.mobFileCompactionChore = new MobFileCompactionChore(this);
1421    getChoreService().scheduleChore(mobFileCompactionChore);
1422  }
1423
1424  /**
1425   * <p>
1426   * Create a {@link ServerManager} instance.
1427   * </p>
1428   * <p>
1429   * Will be overridden in tests.
1430   * </p>
1431   */
1432  @InterfaceAudience.Private
1433  protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
1434    throws IOException {
1435    // We put this out here in a method so can do a Mockito.spy and stub it out
1436    // w/ a mocked up ServerManager.
1437    setupClusterConnection();
1438    return new ServerManager(master, storage);
1439  }
1440
1441  private void waitForRegionServers(final MonitoredTask status)
1442    throws IOException, InterruptedException {
1443    this.serverManager.waitForRegionServers(status);
1444  }
1445
1446  // Will be overridden in tests
1447  @InterfaceAudience.Private
1448  protected void initClusterSchemaService() throws IOException, InterruptedException {
1449    this.clusterSchemaService = new ClusterSchemaServiceImpl(this);
1450    this.clusterSchemaService.startAsync();
1451    try {
1452      this.clusterSchemaService
1453        .awaitRunning(getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS,
1454          DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS);
1455    } catch (TimeoutException toe) {
1456      throw new IOException("Timedout starting ClusterSchemaService", toe);
1457    }
1458  }
1459
1460  private void initQuotaManager() throws IOException {
1461    MasterQuotaManager quotaManager = new MasterQuotaManager(this);
1462    quotaManager.start();
1463    this.quotaManager = quotaManager;
1464  }
1465
1466  private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() {
1467    SpaceQuotaSnapshotNotifier notifier =
1468      SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration());
1469    return notifier;
1470  }
1471
1472  public boolean isCatalogJanitorEnabled() {
1473    return catalogJanitorChore != null ? catalogJanitorChore.getEnabled() : false;
1474  }
1475
1476  boolean isCleanerChoreEnabled() {
1477    boolean hfileCleanerFlag = true, logCleanerFlag = true;
1478
1479    if (getHFileCleaner() != null) {
1480      hfileCleanerFlag = getHFileCleaner().getEnabled();
1481    }
1482
1483    if (logCleaner != null) {
1484      logCleanerFlag = logCleaner.getEnabled();
1485    }
1486
1487    return (hfileCleanerFlag && logCleanerFlag);
1488  }
1489
1490  @Override
1491  public ServerManager getServerManager() {
1492    return this.serverManager;
1493  }
1494
1495  @Override
1496  public MasterFileSystem getMasterFileSystem() {
1497    return this.fileSystemManager;
1498  }
1499
1500  @Override
1501  public MasterWalManager getMasterWalManager() {
1502    return this.walManager;
1503  }
1504
1505  @Override
1506  public SplitWALManager getSplitWALManager() {
1507    return splitWALManager;
1508  }
1509
1510  @Override
1511  public TableStateManager getTableStateManager() {
1512    return tableStateManager;
1513  }
1514
1515  /*
1516   * Start up all services. If any of these threads gets an unhandled exception then they just die
1517   * with a logged message. This should be fine because in general, we do not expect the master to
1518   * get such unhandled exceptions as OOMEs; it should be lightly loaded. See what HRegionServer
1519   * does if need to install an unexpected exception handler.
1520   */
1521  private void startServiceThreads() throws IOException {
1522    // Start the executor service pools
1523    final int masterOpenRegionPoolSize = conf.getInt(HConstants.MASTER_OPEN_REGION_THREADS,
1524      HConstants.MASTER_OPEN_REGION_THREADS_DEFAULT);
1525    executorService.startExecutorService(executorService.new ExecutorConfig()
1526      .setExecutorType(ExecutorType.MASTER_OPEN_REGION).setCorePoolSize(masterOpenRegionPoolSize));
1527    final int masterCloseRegionPoolSize = conf.getInt(HConstants.MASTER_CLOSE_REGION_THREADS,
1528      HConstants.MASTER_CLOSE_REGION_THREADS_DEFAULT);
1529    executorService.startExecutorService(
1530      executorService.new ExecutorConfig().setExecutorType(ExecutorType.MASTER_CLOSE_REGION)
1531        .setCorePoolSize(masterCloseRegionPoolSize));
1532    final int masterServerOpThreads = conf.getInt(HConstants.MASTER_SERVER_OPERATIONS_THREADS,
1533      HConstants.MASTER_SERVER_OPERATIONS_THREADS_DEFAULT);
1534    executorService.startExecutorService(
1535      executorService.new ExecutorConfig().setExecutorType(ExecutorType.MASTER_SERVER_OPERATIONS)
1536        .setCorePoolSize(masterServerOpThreads));
1537    final int masterServerMetaOpsThreads =
1538      conf.getInt(HConstants.MASTER_META_SERVER_OPERATIONS_THREADS,
1539        HConstants.MASTER_META_SERVER_OPERATIONS_THREADS_DEFAULT);
1540    executorService.startExecutorService(executorService.new ExecutorConfig()
1541      .setExecutorType(ExecutorType.MASTER_META_SERVER_OPERATIONS)
1542      .setCorePoolSize(masterServerMetaOpsThreads));
1543    final int masterLogReplayThreads = conf.getInt(HConstants.MASTER_LOG_REPLAY_OPS_THREADS,
1544      HConstants.MASTER_LOG_REPLAY_OPS_THREADS_DEFAULT);
1545    executorService.startExecutorService(executorService.new ExecutorConfig()
1546      .setExecutorType(ExecutorType.M_LOG_REPLAY_OPS).setCorePoolSize(masterLogReplayThreads));
1547    final int masterSnapshotThreads = conf.getInt(SnapshotManager.SNAPSHOT_POOL_THREADS_KEY,
1548      SnapshotManager.SNAPSHOT_POOL_THREADS_DEFAULT);
1549    executorService.startExecutorService(
1550      executorService.new ExecutorConfig().setExecutorType(ExecutorType.MASTER_SNAPSHOT_OPERATIONS)
1551        .setCorePoolSize(masterSnapshotThreads).setAllowCoreThreadTimeout(true));
1552    final int masterMergeDispatchThreads = conf.getInt(HConstants.MASTER_MERGE_DISPATCH_THREADS,
1553      HConstants.MASTER_MERGE_DISPATCH_THREADS_DEFAULT);
1554    executorService.startExecutorService(
1555      executorService.new ExecutorConfig().setExecutorType(ExecutorType.MASTER_MERGE_OPERATIONS)
1556        .setCorePoolSize(masterMergeDispatchThreads).setAllowCoreThreadTimeout(true));
1557
1558    // We depend on there being only one instance of this executor running
1559    // at a time. To do concurrency, would need fencing of enable/disable of
1560    // tables.
1561    // Any time changing this maxThreads to > 1, pls see the comment at
1562    // AccessController#postCompletedCreateTableAction
1563    executorService.startExecutorService(executorService.new ExecutorConfig()
1564      .setExecutorType(ExecutorType.MASTER_TABLE_OPERATIONS).setCorePoolSize(1));
1565    startProcedureExecutor();
1566
1567    // Create log cleaner thread pool
1568    logCleanerPool = DirScanPool.getLogCleanerScanPool(conf);
1569    Map<String, Object> params = new HashMap<>();
1570    params.put(MASTER, this);
1571    // Start log cleaner thread
1572    int cleanerInterval =
1573      conf.getInt(HBASE_MASTER_CLEANER_INTERVAL, DEFAULT_HBASE_MASTER_CLEANER_INTERVAL);
1574    this.logCleaner =
1575      new LogCleaner(cleanerInterval, this, conf, getMasterWalManager().getFileSystem(),
1576        getMasterWalManager().getOldLogDir(), logCleanerPool, params);
1577    getChoreService().scheduleChore(logCleaner);
1578
1579    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
1580
1581    // Create custom archive hfile cleaners
1582    String[] paths = conf.getStrings(HFileCleaner.HFILE_CLEANER_CUSTOM_PATHS);
1583    // todo: handle the overlap issues for the custom paths
1584
1585    if (paths != null && paths.length > 0) {
1586      if (conf.getStrings(HFileCleaner.HFILE_CLEANER_CUSTOM_PATHS_PLUGINS) == null) {
1587        Set<String> cleanerClasses = new HashSet<>();
1588        String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1589        if (cleaners != null) {
1590          Collections.addAll(cleanerClasses, cleaners);
1591        }
1592        conf.setStrings(HFileCleaner.HFILE_CLEANER_CUSTOM_PATHS_PLUGINS,
1593          cleanerClasses.toArray(new String[cleanerClasses.size()]));
1594        LOG.info("Archive custom cleaner paths: {}, plugins: {}", Arrays.asList(paths),
1595          cleanerClasses);
1596      }
1597      // share the hfile cleaner pool in custom paths
1598      sharedHFileCleanerPool = DirScanPool.getHFileCleanerScanPool(conf.get(CUSTOM_POOL_SIZE, "6"));
1599      for (int i = 0; i < paths.length; i++) {
1600        Path path = new Path(paths[i].trim());
1601        HFileCleaner cleaner =
1602          new HFileCleaner("ArchiveCustomHFileCleaner-" + path.getName(), cleanerInterval, this,
1603            conf, getMasterFileSystem().getFileSystem(), new Path(archiveDir, path),
1604            HFileCleaner.HFILE_CLEANER_CUSTOM_PATHS_PLUGINS, sharedHFileCleanerPool, params, null);
1605        hfileCleaners.add(cleaner);
1606        hfileCleanerPaths.add(path);
1607      }
1608    }
1609
1610    // Create the whole archive dir cleaner thread pool
1611    exclusiveHFileCleanerPool = DirScanPool.getHFileCleanerScanPool(conf);
1612    hfileCleaners.add(0,
1613      new HFileCleaner(cleanerInterval, this, conf, getMasterFileSystem().getFileSystem(),
1614        archiveDir, exclusiveHFileCleanerPool, params, hfileCleanerPaths));
1615    hfileCleanerPaths.add(0, archiveDir);
1616    // Schedule all the hfile cleaners
1617    for (HFileCleaner hFileCleaner : hfileCleaners) {
1618      getChoreService().scheduleChore(hFileCleaner);
1619    }
1620
1621    // Regions Reopen based on very high storeFileRefCount is considered enabled
1622    // only if hbase.regions.recovery.store.file.ref.count has value > 0
1623    final int maxStoreFileRefCount = conf.getInt(HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
1624      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
1625    if (maxStoreFileRefCount > 0) {
1626      this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
1627      getChoreService().scheduleChore(this.regionsRecoveryChore);
1628    } else {
1629      LOG.info(
1630        "Reopening regions with very high storeFileRefCount is disabled. "
1631          + "Provide threshold value > 0 for {} to enable it.",
1632        HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
1633    }
1634
1635    this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
1636
1637    replicationBarrierCleaner =
1638      new ReplicationBarrierCleaner(conf, this, getConnection(), replicationPeerManager);
1639    getChoreService().scheduleChore(replicationBarrierCleaner);
1640
1641    final boolean isSnapshotChoreEnabled = this.snapshotCleanupStateStore.get();
1642    this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager());
1643    if (isSnapshotChoreEnabled) {
1644      getChoreService().scheduleChore(this.snapshotCleanerChore);
1645    } else {
1646      if (LOG.isTraceEnabled()) {
1647        LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore..");
1648      }
1649    }
1650    serviceStarted = true;
1651    if (LOG.isTraceEnabled()) {
1652      LOG.trace("Started service threads");
1653    }
1654  }
1655
1656  protected void stopServiceThreads() {
1657    if (masterJettyServer != null) {
1658      LOG.info("Stopping master jetty server");
1659      try {
1660        masterJettyServer.stop();
1661      } catch (Exception e) {
1662        LOG.error("Failed to stop master jetty server", e);
1663      }
1664    }
1665    stopChoreService();
1666    stopExecutorService();
1667    if (exclusiveHFileCleanerPool != null) {
1668      exclusiveHFileCleanerPool.shutdownNow();
1669      exclusiveHFileCleanerPool = null;
1670    }
1671    if (logCleanerPool != null) {
1672      logCleanerPool.shutdownNow();
1673      logCleanerPool = null;
1674    }
1675    if (sharedHFileCleanerPool != null) {
1676      sharedHFileCleanerPool.shutdownNow();
1677      sharedHFileCleanerPool = null;
1678    }
1679    if (maintenanceRegionServer != null) {
1680      maintenanceRegionServer.getRegionServer().stop(HBASE_MASTER_CLEANER_INTERVAL);
1681    }
1682
1683    LOG.debug("Stopping service threads");
1684    // stop procedure executor prior to other services such as server manager and assignment
1685    // manager, as these services are important for some running procedures. See HBASE-24117 for
1686    // example.
1687    stopProcedureExecutor();
1688
1689    if (regionNormalizerManager != null) {
1690      regionNormalizerManager.stop();
1691    }
1692    if (this.quotaManager != null) {
1693      this.quotaManager.stop();
1694    }
1695
1696    if (this.activeMasterManager != null) {
1697      this.activeMasterManager.stop();
1698    }
1699    if (this.serverManager != null) {
1700      this.serverManager.stop();
1701    }
1702    if (this.assignmentManager != null) {
1703      this.assignmentManager.stop();
1704    }
1705
1706    if (masterRegion != null) {
1707      masterRegion.close(isAborted());
1708    }
1709    if (this.walManager != null) {
1710      this.walManager.stop();
1711    }
1712    if (this.fileSystemManager != null) {
1713      this.fileSystemManager.stop();
1714    }
1715    if (this.mpmHost != null) {
1716      this.mpmHost.stop("server shutting down.");
1717    }
1718    if (this.regionServerTracker != null) {
1719      this.regionServerTracker.stop();
1720    }
1721  }
1722
1723  private void createProcedureExecutor() throws IOException {
1724    MasterProcedureEnv procEnv = new MasterProcedureEnv(this);
1725    procedureStore = new RegionProcedureStore(this, masterRegion,
1726      new MasterProcedureEnv.FsUtilsLeaseRecovery(this));
1727    procedureStore.registerListener(new ProcedureStoreListener() {
1728
1729      @Override
1730      public void abortProcess() {
1731        abort("The Procedure Store lost the lease", null);
1732      }
1733    });
1734    MasterProcedureScheduler procedureScheduler = procEnv.getProcedureScheduler();
1735    procedureExecutor = new ProcedureExecutor<>(conf, procEnv, procedureStore, procedureScheduler);
1736    configurationManager.registerObserver(procEnv);
1737
1738    int cpus = Runtime.getRuntime().availableProcessors();
1739    final int numThreads = conf.getInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, Math.max(
1740      (cpus > 0 ? cpus / 4 : 0), MasterProcedureConstants.DEFAULT_MIN_MASTER_PROCEDURE_THREADS));
1741    final boolean abortOnCorruption =
1742      conf.getBoolean(MasterProcedureConstants.EXECUTOR_ABORT_ON_CORRUPTION,
1743        MasterProcedureConstants.DEFAULT_EXECUTOR_ABORT_ON_CORRUPTION);
1744    procedureStore.start(numThreads);
1745    // Just initialize it but do not start the workers, we will start the workers later by calling
1746    // startProcedureExecutor. See the javadoc for finishActiveMasterInitialization for more
1747    // details.
1748    procedureExecutor.init(numThreads, abortOnCorruption);
1749    if (!procEnv.getRemoteDispatcher().start()) {
1750      throw new HBaseIOException("Failed start of remote dispatcher");
1751    }
1752  }
1753
1754  // will be override in UT
1755  protected void startProcedureExecutor() throws IOException {
1756    procedureExecutor.startWorkers();
1757  }
1758
1759  /**
1760   * Turn on/off Snapshot Cleanup Chore
1761   * @param on indicates whether Snapshot Cleanup Chore is to be run
1762   */
1763  void switchSnapshotCleanup(final boolean on, final boolean synchronous) throws IOException {
1764    if (synchronous) {
1765      synchronized (this.snapshotCleanerChore) {
1766        switchSnapshotCleanup(on);
1767      }
1768    } else {
1769      switchSnapshotCleanup(on);
1770    }
1771  }
1772
1773  private void switchSnapshotCleanup(final boolean on) throws IOException {
1774    snapshotCleanupStateStore.set(on);
1775    if (on) {
1776      getChoreService().scheduleChore(this.snapshotCleanerChore);
1777    } else {
1778      this.snapshotCleanerChore.cancel();
1779    }
1780  }
1781
1782  private void stopProcedureExecutor() {
1783    if (procedureExecutor != null) {
1784      configurationManager.deregisterObserver(procedureExecutor.getEnvironment());
1785      procedureExecutor.getEnvironment().getRemoteDispatcher().stop();
1786      procedureExecutor.stop();
1787      procedureExecutor.join();
1788      procedureExecutor = null;
1789    }
1790
1791    if (procedureStore != null) {
1792      procedureStore.stop(isAborted());
1793      procedureStore = null;
1794    }
1795  }
1796
1797  protected void stopChores() {
1798    shutdownChore(mobFileCleanerChore);
1799    shutdownChore(mobFileCompactionChore);
1800    shutdownChore(balancerChore);
1801    if (regionNormalizerManager != null) {
1802      shutdownChore(regionNormalizerManager.getRegionNormalizerChore());
1803    }
1804    shutdownChore(clusterStatusChore);
1805    shutdownChore(catalogJanitorChore);
1806    shutdownChore(clusterStatusPublisherChore);
1807    shutdownChore(snapshotQuotaChore);
1808    shutdownChore(logCleaner);
1809    if (hfileCleaners != null) {
1810      for (ScheduledChore chore : hfileCleaners) {
1811        chore.shutdown();
1812      }
1813      hfileCleaners = null;
1814    }
1815    shutdownChore(replicationBarrierCleaner);
1816    shutdownChore(snapshotCleanerChore);
1817    shutdownChore(hbckChore);
1818    shutdownChore(regionsRecoveryChore);
1819    shutdownChore(rollingUpgradeChore);
1820  }
1821
1822  /** Returns Get remote side's InetAddress */
1823  InetAddress getRemoteInetAddress(final int port, final long serverStartCode)
1824    throws UnknownHostException {
1825    // Do it out here in its own little method so can fake an address when
1826    // mocking up in tests.
1827    InetAddress ia = RpcServer.getRemoteIp();
1828
1829    // The call could be from the local regionserver,
1830    // in which case, there is no remote address.
1831    if (ia == null && serverStartCode == startcode) {
1832      InetSocketAddress isa = rpcServices.getSocketAddress();
1833      if (isa != null && isa.getPort() == port) {
1834        ia = isa.getAddress();
1835      }
1836    }
1837    return ia;
1838  }
1839
1840  /** Returns Maximum time we should run balancer for */
1841  private int getMaxBalancingTime() {
1842    // if max balancing time isn't set, defaulting it to period time
1843    int maxBalancingTime =
1844      getConfiguration().getInt(HConstants.HBASE_BALANCER_MAX_BALANCING, getConfiguration()
1845        .getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
1846    return maxBalancingTime;
1847  }
1848
1849  /** Returns Maximum number of regions in transition */
1850  private int getMaxRegionsInTransition() {
1851    int numRegions = this.assignmentManager.getRegionStates().getRegionAssignments().size();
1852    return Math.max((int) Math.floor(numRegions * this.maxRitPercent), 1);
1853  }
1854
1855  /**
1856   * It first sleep to the next balance plan start time. Meanwhile, throttling by the max number
1857   * regions in transition to protect availability.
1858   * @param nextBalanceStartTime   The next balance plan start time
1859   * @param maxRegionsInTransition max number of regions in transition
1860   * @param cutoffTime             when to exit balancer
1861   */
1862  private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransition,
1863    long cutoffTime) {
1864    boolean interrupted = false;
1865
1866    // Sleep to next balance plan start time
1867    // But if there are zero regions in transition, it can skip sleep to speed up.
1868    while (
1869      !interrupted && EnvironmentEdgeManager.currentTime() < nextBalanceStartTime
1870        && this.assignmentManager.getRegionStates().hasRegionsInTransition()
1871    ) {
1872      try {
1873        Thread.sleep(100);
1874      } catch (InterruptedException ie) {
1875        interrupted = true;
1876      }
1877    }
1878
1879    // Throttling by max number regions in transition
1880    while (
1881      !interrupted && maxRegionsInTransition > 0
1882        && this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
1883            >= maxRegionsInTransition
1884        && EnvironmentEdgeManager.currentTime() <= cutoffTime
1885    ) {
1886      try {
1887        // sleep if the number of regions in transition exceeds the limit
1888        Thread.sleep(100);
1889      } catch (InterruptedException ie) {
1890        interrupted = true;
1891      }
1892    }
1893
1894    if (interrupted) Thread.currentThread().interrupt();
1895  }
1896
1897  public BalanceResponse balance() throws IOException {
1898    return balance(BalanceRequest.defaultInstance());
1899  }
1900
1901  /**
1902   * Trigger a normal balance, see {@link HMaster#balance()} . If the balance is not executed this
1903   * time, the metrics related to the balance will be updated. When balance is running, related
1904   * metrics will be updated at the same time. But if some checking logic failed and cause the
1905   * balancer exit early, we lost the chance to update balancer metrics. This will lead to user
1906   * missing the latest balancer info.
1907   */
1908  public BalanceResponse balanceOrUpdateMetrics() throws IOException {
1909    synchronized (this.balancer) {
1910      BalanceResponse response = balance();
1911      if (!response.isBalancerRan()) {
1912        Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
1913          this.assignmentManager.getRegionStates().getAssignmentsForBalancer(this.tableStateManager,
1914            this.serverManager.getOnlineServersList());
1915        for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
1916          serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
1917        }
1918        this.balancer.updateBalancerLoadInfo(assignments);
1919      }
1920      return response;
1921    }
1922  }
1923
1924  /**
1925   * Checks master state before initiating action over region topology.
1926   * @param action the name of the action under consideration, for logging.
1927   * @return {@code true} when the caller should exit early, {@code false} otherwise.
1928   */
1929  @Override
1930  public boolean skipRegionManagementAction(final String action) {
1931    // Note: this method could be `default` on MasterServices if but for logging.
1932    if (!isInitialized()) {
1933      LOG.debug("Master has not been initialized, don't run {}.", action);
1934      return true;
1935    }
1936    if (this.getServerManager().isClusterShutdown()) {
1937      LOG.info("Cluster is shutting down, don't run {}.", action);
1938      return true;
1939    }
1940    if (isInMaintenanceMode()) {
1941      LOG.info("Master is in maintenance mode, don't run {}.", action);
1942      return true;
1943    }
1944    return false;
1945  }
1946
1947  public BalanceResponse balance(BalanceRequest request) throws IOException {
1948    checkInitialized();
1949
1950    BalanceResponse.Builder responseBuilder = BalanceResponse.newBuilder();
1951
1952    if (loadBalancerStateStore == null || !(loadBalancerStateStore.get() || request.isDryRun())) {
1953      return responseBuilder.build();
1954    }
1955
1956    if (skipRegionManagementAction("balancer")) {
1957      return responseBuilder.build();
1958    }
1959
1960    synchronized (this.balancer) {
1961      // Only allow one balance run at at time.
1962      if (this.assignmentManager.hasRegionsInTransition()) {
1963        List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
1964        // if hbase:meta region is in transition, result of assignment cannot be recorded
1965        // ignore the force flag in that case
1966        boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
1967        List<RegionStateNode> toPrint = regionsInTransition;
1968        int max = 5;
1969        boolean truncated = false;
1970        if (regionsInTransition.size() > max) {
1971          toPrint = regionsInTransition.subList(0, max);
1972          truncated = true;
1973        }
1974
1975        if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
1976          LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
1977            + ") because " + regionsInTransition.size() + " region(s) in transition: " + toPrint
1978            + (truncated ? "(truncated list)" : ""));
1979          return responseBuilder.build();
1980        }
1981      }
1982      if (this.serverManager.areDeadServersInProgress()) {
1983        LOG.info("Not running balancer because processing dead regionserver(s): "
1984          + this.serverManager.getDeadServers());
1985        return responseBuilder.build();
1986      }
1987
1988      if (this.cpHost != null) {
1989        try {
1990          if (this.cpHost.preBalance(request)) {
1991            LOG.debug("Coprocessor bypassing balancer request");
1992            return responseBuilder.build();
1993          }
1994        } catch (IOException ioe) {
1995          LOG.error("Error invoking master coprocessor preBalance()", ioe);
1996          return responseBuilder.build();
1997        }
1998      }
1999
2000      Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
2001        this.assignmentManager.getRegionStates().getAssignmentsForBalancer(tableStateManager,
2002          this.serverManager.getOnlineServersList());
2003      for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
2004        serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
2005      }
2006
2007      // Give the balancer the current cluster state.
2008      this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
2009
2010      List<RegionPlan> plans = this.balancer.balanceCluster(assignments);
2011
2012      responseBuilder.setBalancerRan(true).setMovesCalculated(plans == null ? 0 : plans.size());
2013
2014      if (skipRegionManagementAction("balancer")) {
2015        // make one last check that the cluster isn't shutting down before proceeding.
2016        return responseBuilder.build();
2017      }
2018
2019      // For dry run we don't actually want to execute the moves, but we do want
2020      // to execute the coprocessor below
2021      List<RegionPlan> sucRPs =
2022        request.isDryRun() ? Collections.emptyList() : executeRegionPlansWithThrottling(plans);
2023
2024      if (this.cpHost != null) {
2025        try {
2026          this.cpHost.postBalance(request, sucRPs);
2027        } catch (IOException ioe) {
2028          // balancing already succeeded so don't change the result
2029          LOG.error("Error invoking master coprocessor postBalance()", ioe);
2030        }
2031      }
2032
2033      responseBuilder.setMovesExecuted(sucRPs.size());
2034    }
2035
2036    // If LoadBalancer did not generate any plans, it means the cluster is already balanced.
2037    // Return true indicating a success.
2038    return responseBuilder.build();
2039  }
2040
2041  /**
2042   * Execute region plans with throttling
2043   * @param plans to execute
2044   * @return succeeded plans
2045   */
2046  public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
2047    List<RegionPlan> successRegionPlans = new ArrayList<>();
2048    int maxRegionsInTransition = getMaxRegionsInTransition();
2049    long balanceStartTime = EnvironmentEdgeManager.currentTime();
2050    long cutoffTime = balanceStartTime + this.maxBalancingTime;
2051    int rpCount = 0; // number of RegionPlans balanced so far
2052    if (plans != null && !plans.isEmpty()) {
2053      int balanceInterval = this.maxBalancingTime / plans.size();
2054      LOG.info(
2055        "Balancer plans size is " + plans.size() + ", the balance interval is " + balanceInterval
2056          + " ms, and the max number regions in transition is " + maxRegionsInTransition);
2057
2058      for (RegionPlan plan : plans) {
2059        LOG.info("balance " + plan);
2060        // TODO: bulk assign
2061        try {
2062          this.assignmentManager.balance(plan);
2063        } catch (HBaseIOException hioe) {
2064          // should ignore failed plans here, avoiding the whole balance plans be aborted
2065          // later calls of balance() can fetch up the failed and skipped plans
2066          LOG.warn("Failed balance plan {}, skipping...", plan, hioe);
2067        }
2068        // rpCount records balance plans processed, does not care if a plan succeeds
2069        rpCount++;
2070        successRegionPlans.add(plan);
2071
2072        if (this.maxBalancingTime > 0) {
2073          balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition,
2074            cutoffTime);
2075        }
2076
2077        // if performing next balance exceeds cutoff time, exit the loop
2078        if (
2079          this.maxBalancingTime > 0 && rpCount < plans.size()
2080            && EnvironmentEdgeManager.currentTime() > cutoffTime
2081        ) {
2082          // TODO: After balance, there should not be a cutoff time (keeping it as
2083          // a security net for now)
2084          LOG.debug(
2085            "No more balancing till next balance run; maxBalanceTime=" + this.maxBalancingTime);
2086          break;
2087        }
2088      }
2089    }
2090    LOG.debug("Balancer is going into sleep until next period in {}ms", getConfiguration()
2091      .getInt(HConstants.HBASE_BALANCER_PERIOD, HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
2092    return successRegionPlans;
2093  }
2094
2095  @Override
2096  public RegionNormalizerManager getRegionNormalizerManager() {
2097    return regionNormalizerManager;
2098  }
2099
2100  @Override
2101  public boolean normalizeRegions(final NormalizeTableFilterParams ntfp,
2102    final boolean isHighPriority) throws IOException {
2103    if (regionNormalizerManager == null || !regionNormalizerManager.isNormalizerOn()) {
2104      LOG.debug("Region normalization is disabled, don't run region normalizer.");
2105      return false;
2106    }
2107    if (skipRegionManagementAction("region normalizer")) {
2108      return false;
2109    }
2110    if (assignmentManager.hasRegionsInTransition()) {
2111      return false;
2112    }
2113
2114    final Set<TableName> matchingTables = getTableDescriptors(new LinkedList<>(),
2115      ntfp.getNamespace(), ntfp.getRegex(), ntfp.getTableNames(), false).stream()
2116        .map(TableDescriptor::getTableName).collect(Collectors.toSet());
2117    final Set<TableName> allEnabledTables =
2118      tableStateManager.getTablesInStates(TableState.State.ENABLED);
2119    final List<TableName> targetTables =
2120      new ArrayList<>(Sets.intersection(matchingTables, allEnabledTables));
2121    Collections.shuffle(targetTables);
2122    return regionNormalizerManager.normalizeRegions(targetTables, isHighPriority);
2123  }
2124
2125  /** Returns Client info for use as prefix on an audit log string; who did an action */
2126  @Override
2127  public String getClientIdAuditPrefix() {
2128    return "Client=" + RpcServer.getRequestUserName().orElse(null) + "/"
2129      + RpcServer.getRemoteAddress().orElse(null);
2130  }
2131
2132  /**
2133   * Switch for the background CatalogJanitor thread. Used for testing. The thread will continue to
2134   * run. It will just be a noop if disabled.
2135   * @param b If false, the catalog janitor won't do anything.
2136   */
2137  public void setCatalogJanitorEnabled(final boolean b) {
2138    this.catalogJanitorChore.setEnabled(b);
2139  }
2140
2141  @Override
2142  public long mergeRegions(final RegionInfo[] regionsToMerge, final boolean forcible, final long ng,
2143    final long nonce) throws IOException {
2144    checkInitialized();
2145
2146    if (!isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {
2147      String regionsStr = Arrays.deepToString(regionsToMerge);
2148      LOG.warn("Merge switch is off! skip merge of " + regionsStr);
2149      throw new DoNotRetryIOException(
2150        "Merge of " + regionsStr + " failed because merge switch is off");
2151    }
2152
2153    final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(RegionInfo::getEncodedName)
2154      .collect(Collectors.joining(", "));
2155    return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
2156      @Override
2157      protected void run() throws IOException {
2158        getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge);
2159        String aid = getClientIdAuditPrefix();
2160        LOG.info("{} merge regions {}", aid, mergeRegionsStr);
2161        submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(),
2162          regionsToMerge, forcible));
2163        getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge);
2164      }
2165
2166      @Override
2167      protected String getDescription() {
2168        return "MergeTableProcedure";
2169      }
2170    });
2171  }
2172
2173  @Override
2174  public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow, final long nonceGroup,
2175    final long nonce) throws IOException {
2176    checkInitialized();
2177
2178    if (!isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {
2179      LOG.warn("Split switch is off! skip split of " + regionInfo);
2180      throw new DoNotRetryIOException(
2181        "Split region " + regionInfo.getRegionNameAsString() + " failed due to split switch off");
2182    }
2183
2184    return MasterProcedureUtil
2185      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2186        @Override
2187        protected void run() throws IOException {
2188          getMaster().getMasterCoprocessorHost().preSplitRegion(regionInfo.getTable(), splitRow);
2189          LOG.info(getClientIdAuditPrefix() + " split " + regionInfo.getRegionNameAsString());
2190
2191          // Execute the operation asynchronously
2192          submitProcedure(getAssignmentManager().createSplitProcedure(regionInfo, splitRow));
2193        }
2194
2195        @Override
2196        protected String getDescription() {
2197          return "SplitTableProcedure";
2198        }
2199      });
2200  }
2201
2202  private void warmUpRegion(ServerName server, RegionInfo region) {
2203    FutureUtils.addListener(asyncClusterConnection.getRegionServerAdmin(server)
2204      .warmupRegion(RequestConverter.buildWarmupRegionRequest(region)), (r, e) -> {
2205        if (e != null) {
2206          LOG.warn("Failed to warm up region {} on server {}", region, server, e);
2207        }
2208      });
2209  }
2210
2211  // Public so can be accessed by tests. Blocks until move is done.
2212  // Replace with an async implementation from which you can get
2213  // a success/failure result.
2214  @InterfaceAudience.Private
2215  public void move(final byte[] encodedRegionName, byte[] destServerName) throws IOException {
2216    RegionState regionState =
2217      assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
2218
2219    RegionInfo hri;
2220    if (regionState != null) {
2221      hri = regionState.getRegion();
2222    } else {
2223      throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
2224    }
2225
2226    ServerName dest;
2227    List<ServerName> exclude = hri.getTable().isSystemTable()
2228      ? assignmentManager.getExcludedServersForSystemTable()
2229      : new ArrayList<>(1);
2230    if (
2231      destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))
2232    ) {
2233      LOG.info(Bytes.toString(encodedRegionName) + " can not move to "
2234        + Bytes.toString(destServerName) + " because the server is in exclude list");
2235      destServerName = null;
2236    }
2237    if (destServerName == null || destServerName.length == 0) {
2238      LOG.info("Passed destination servername is null/empty so " + "choosing a server at random");
2239      exclude.add(regionState.getServerName());
2240      final List<ServerName> destServers = this.serverManager.createDestinationServersList(exclude);
2241      dest = balancer.randomAssignment(hri, destServers);
2242      if (dest == null) {
2243        LOG.debug("Unable to determine a plan to assign " + hri);
2244        return;
2245      }
2246    } else {
2247      ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
2248      dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
2249      if (dest == null) {
2250        LOG.debug("Unable to determine a plan to assign " + hri);
2251        return;
2252      }
2253      // TODO: deal with table on master for rs group.
2254      if (dest.equals(serverName)) {
2255        // To avoid unnecessary region moving later by balancer. Don't put user
2256        // regions on master.
2257        LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2258          + " to avoid unnecessary region moving later by load balancer,"
2259          + " because it should not be on master");
2260        return;
2261      }
2262    }
2263
2264    if (dest.equals(regionState.getServerName())) {
2265      LOG.debug("Skipping move of region " + hri.getRegionNameAsString()
2266        + " because region already assigned to the same server " + dest + ".");
2267      return;
2268    }
2269
2270    // Now we can do the move
2271    RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
2272    assert rp.getDestination() != null : rp.toString() + " " + dest;
2273
2274    try {
2275      checkInitialized();
2276      if (this.cpHost != null) {
2277        this.cpHost.preMove(hri, rp.getSource(), rp.getDestination());
2278      }
2279
2280      TransitRegionStateProcedure proc =
2281        this.assignmentManager.createMoveRegionProcedure(rp.getRegionInfo(), rp.getDestination());
2282      if (conf.getBoolean(WARMUP_BEFORE_MOVE, DEFAULT_WARMUP_BEFORE_MOVE)) {
2283        // Warmup the region on the destination before initiating the move.
2284        // A region server could reject the close request because it either does not
2285        // have the specified region or the region is being split.
2286        LOG.info(getClientIdAuditPrefix() + " move " + rp + ", warming up region on "
2287          + rp.getDestination());
2288        warmUpRegion(rp.getDestination(), hri);
2289      }
2290      LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
2291      Future<byte[]> future = ProcedureSyncWait.submitProcedure(this.procedureExecutor, proc);
2292      try {
2293        // Is this going to work? Will we throw exception on error?
2294        // TODO: CompletableFuture rather than this stunted Future.
2295        future.get();
2296      } catch (InterruptedException | ExecutionException e) {
2297        throw new HBaseIOException(e);
2298      }
2299      if (this.cpHost != null) {
2300        this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
2301      }
2302    } catch (IOException ioe) {
2303      if (ioe instanceof HBaseIOException) {
2304        throw (HBaseIOException) ioe;
2305      }
2306      throw new HBaseIOException(ioe);
2307    }
2308  }
2309
2310  @Override
2311  public long createTable(final TableDescriptor tableDescriptor, final byte[][] splitKeys,
2312    final long nonceGroup, final long nonce) throws IOException {
2313    checkInitialized();
2314    TableDescriptor desc = getMasterCoprocessorHost().preCreateTableRegionsInfos(tableDescriptor);
2315    if (desc == null) {
2316      throw new IOException("Creation for " + tableDescriptor + " is canceled by CP");
2317    }
2318    String namespace = desc.getTableName().getNamespaceAsString();
2319    this.clusterSchemaService.getNamespace(namespace);
2320
2321    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(desc, splitKeys);
2322    TableDescriptorChecker.sanityCheck(conf, desc);
2323
2324    return MasterProcedureUtil
2325      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2326        @Override
2327        protected void run() throws IOException {
2328          getMaster().getMasterCoprocessorHost().preCreateTable(desc, newRegions);
2329
2330          LOG.info(getClientIdAuditPrefix() + " create " + desc);
2331
2332          // TODO: We can handle/merge duplicate requests, and differentiate the case of
2333          // TableExistsException by saying if the schema is the same or not.
2334          //
2335          // We need to wait for the procedure to potentially fail due to "prepare" sanity
2336          // checks. This will block only the beginning of the procedure. See HBASE-19953.
2337          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2338          submitProcedure(
2339            new CreateTableProcedure(procedureExecutor.getEnvironment(), desc, newRegions, latch));
2340          latch.await();
2341
2342          getMaster().getMasterCoprocessorHost().postCreateTable(desc, newRegions);
2343        }
2344
2345        @Override
2346        protected String getDescription() {
2347          return "CreateTableProcedure";
2348        }
2349      });
2350  }
2351
2352  @Override
2353  public long createSystemTable(final TableDescriptor tableDescriptor) throws IOException {
2354    if (isStopped()) {
2355      throw new MasterNotRunningException();
2356    }
2357
2358    TableName tableName = tableDescriptor.getTableName();
2359    if (!(tableName.isSystemTable())) {
2360      throw new IllegalArgumentException(
2361        "Only system table creation can use this createSystemTable API");
2362    }
2363
2364    RegionInfo[] newRegions = ModifyRegionUtils.createRegionInfos(tableDescriptor, null);
2365
2366    LOG.info(getClientIdAuditPrefix() + " create " + tableDescriptor);
2367
2368    // This special create table is called locally to master. Therefore, no RPC means no need
2369    // to use nonce to detect duplicated RPC call.
2370    long procId = this.procedureExecutor.submitProcedure(
2371      new CreateTableProcedure(procedureExecutor.getEnvironment(), tableDescriptor, newRegions));
2372
2373    return procId;
2374  }
2375
2376  private void startActiveMasterManager(int infoPort) throws KeeperException {
2377    String backupZNode = ZNodePaths.joinZNode(zooKeeper.getZNodePaths().backupMasterAddressesZNode,
2378      serverName.toString());
2379    /*
2380     * Add a ZNode for ourselves in the backup master directory since we may not become the active
2381     * master. If so, we want the actual active master to know we are backup masters, so that it
2382     * won't assign regions to us if so configured. If we become the active master later,
2383     * ActiveMasterManager will delete this node explicitly. If we crash before then, ZooKeeper will
2384     * delete this node for us since it is ephemeral.
2385     */
2386    LOG.info("Adding backup master ZNode " + backupZNode);
2387    if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
2388      LOG.warn("Failed create of " + backupZNode + " by " + serverName);
2389    }
2390    this.activeMasterManager.setInfoPort(infoPort);
2391    int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
2392    // If we're a backup master, stall until a primary to write this address
2393    if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
2394      LOG.debug("HMaster started in backup mode. Stalling until master znode is written.");
2395      // This will only be a minute or so while the cluster starts up,
2396      // so don't worry about setting watches on the parent znode
2397      while (!activeMasterManager.hasActiveMaster()) {
2398        LOG.debug("Waiting for master address and cluster state znode to be written.");
2399        Threads.sleep(timeout);
2400      }
2401    }
2402    MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
2403    status.setDescription("Master startup");
2404    try {
2405      if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
2406        finishActiveMasterInitialization(status);
2407      }
2408    } catch (Throwable t) {
2409      status.setStatus("Failed to become active: " + t.getMessage());
2410      LOG.error(HBaseMarkers.FATAL, "Failed to become active master", t);
2411      // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
2412      if (
2413        t instanceof NoClassDefFoundError
2414          && t.getMessage().contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")
2415      ) {
2416        // improved error message for this special case
2417        abort("HBase is having a problem with its Hadoop jars.  You may need to recompile "
2418          + "HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion()
2419          + " or change your hadoop jars to start properly", t);
2420      } else {
2421        abort("Unhandled exception. Starting shutdown.", t);
2422      }
2423    } finally {
2424      status.cleanup();
2425    }
2426  }
2427
2428  private static boolean isCatalogTable(final TableName tableName) {
2429    return tableName.equals(TableName.META_TABLE_NAME);
2430  }
2431
2432  @Override
2433  public long deleteTable(final TableName tableName, final long nonceGroup, final long nonce)
2434    throws IOException {
2435    checkInitialized();
2436
2437    return MasterProcedureUtil
2438      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2439        @Override
2440        protected void run() throws IOException {
2441          getMaster().getMasterCoprocessorHost().preDeleteTable(tableName);
2442
2443          LOG.info(getClientIdAuditPrefix() + " delete " + tableName);
2444
2445          // TODO: We can handle/merge duplicate request
2446          //
2447          // We need to wait for the procedure to potentially fail due to "prepare" sanity
2448          // checks. This will block only the beginning of the procedure. See HBASE-19953.
2449          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2450          submitProcedure(
2451            new DeleteTableProcedure(procedureExecutor.getEnvironment(), tableName, latch));
2452          latch.await();
2453
2454          getMaster().getMasterCoprocessorHost().postDeleteTable(tableName);
2455        }
2456
2457        @Override
2458        protected String getDescription() {
2459          return "DeleteTableProcedure";
2460        }
2461      });
2462  }
2463
2464  @Override
2465  public long truncateTable(final TableName tableName, final boolean preserveSplits,
2466    final long nonceGroup, final long nonce) throws IOException {
2467    checkInitialized();
2468
2469    return MasterProcedureUtil
2470      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2471        @Override
2472        protected void run() throws IOException {
2473          getMaster().getMasterCoprocessorHost().preTruncateTable(tableName);
2474
2475          LOG.info(getClientIdAuditPrefix() + " truncate " + tableName);
2476          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createLatch(2, 0);
2477          submitProcedure(new TruncateTableProcedure(procedureExecutor.getEnvironment(), tableName,
2478            preserveSplits, latch));
2479          latch.await();
2480
2481          getMaster().getMasterCoprocessorHost().postTruncateTable(tableName);
2482        }
2483
2484        @Override
2485        protected String getDescription() {
2486          return "TruncateTableProcedure";
2487        }
2488      });
2489  }
2490
2491  @Override
2492  public long addColumn(final TableName tableName, final ColumnFamilyDescriptor column,
2493    final long nonceGroup, final long nonce) throws IOException {
2494    checkInitialized();
2495    checkTableExists(tableName);
2496
2497    return modifyTable(tableName, new TableDescriptorGetter() {
2498
2499      @Override
2500      public TableDescriptor get() throws IOException {
2501        TableDescriptor old = getTableDescriptors().get(tableName);
2502        if (old.hasColumnFamily(column.getName())) {
2503          throw new InvalidFamilyOperationException("Column family '" + column.getNameAsString()
2504            + "' in table '" + tableName + "' already exists so cannot be added");
2505        }
2506
2507        return TableDescriptorBuilder.newBuilder(old).setColumnFamily(column).build();
2508      }
2509    }, nonceGroup, nonce, true);
2510  }
2511
2512  /**
2513   * Implement to return TableDescriptor after pre-checks
2514   */
2515  protected interface TableDescriptorGetter {
2516    TableDescriptor get() throws IOException;
2517  }
2518
2519  @Override
2520  public long modifyColumn(final TableName tableName, final ColumnFamilyDescriptor descriptor,
2521    final long nonceGroup, final long nonce) throws IOException {
2522    checkInitialized();
2523    checkTableExists(tableName);
2524    return modifyTable(tableName, new TableDescriptorGetter() {
2525
2526      @Override
2527      public TableDescriptor get() throws IOException {
2528        TableDescriptor old = getTableDescriptors().get(tableName);
2529        if (!old.hasColumnFamily(descriptor.getName())) {
2530          throw new InvalidFamilyOperationException("Family '" + descriptor.getNameAsString()
2531            + "' does not exist, so it cannot be modified");
2532        }
2533
2534        return TableDescriptorBuilder.newBuilder(old).modifyColumnFamily(descriptor).build();
2535      }
2536    }, nonceGroup, nonce, true);
2537  }
2538
2539  @Override
2540  public long modifyColumnStoreFileTracker(TableName tableName, byte[] family, String dstSFT,
2541    long nonceGroup, long nonce) throws IOException {
2542    checkInitialized();
2543    return MasterProcedureUtil
2544      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2545
2546        @Override
2547        protected void run() throws IOException {
2548          String sft = getMaster().getMasterCoprocessorHost()
2549            .preModifyColumnFamilyStoreFileTracker(tableName, family, dstSFT);
2550          LOG.info("{} modify column {} store file tracker of table {} to {}",
2551            getClientIdAuditPrefix(), Bytes.toStringBinary(family), tableName, sft);
2552          submitProcedure(new ModifyColumnFamilyStoreFileTrackerProcedure(
2553            procedureExecutor.getEnvironment(), tableName, family, sft));
2554          getMaster().getMasterCoprocessorHost().postModifyColumnFamilyStoreFileTracker(tableName,
2555            family, dstSFT);
2556        }
2557
2558        @Override
2559        protected String getDescription() {
2560          return "ModifyColumnFamilyStoreFileTrackerProcedure";
2561        }
2562      });
2563  }
2564
2565  @Override
2566  public long deleteColumn(final TableName tableName, final byte[] columnName,
2567    final long nonceGroup, final long nonce) throws IOException {
2568    checkInitialized();
2569    checkTableExists(tableName);
2570
2571    return modifyTable(tableName, new TableDescriptorGetter() {
2572
2573      @Override
2574      public TableDescriptor get() throws IOException {
2575        TableDescriptor old = getTableDescriptors().get(tableName);
2576
2577        if (!old.hasColumnFamily(columnName)) {
2578          throw new InvalidFamilyOperationException(
2579            "Family '" + Bytes.toString(columnName) + "' does not exist, so it cannot be deleted");
2580        }
2581        if (old.getColumnFamilyCount() == 1) {
2582          throw new InvalidFamilyOperationException("Family '" + Bytes.toString(columnName)
2583            + "' is the only column family in the table, so it cannot be deleted");
2584        }
2585        return TableDescriptorBuilder.newBuilder(old).removeColumnFamily(columnName).build();
2586      }
2587    }, nonceGroup, nonce, true);
2588  }
2589
2590  @Override
2591  public long enableTable(final TableName tableName, final long nonceGroup, final long nonce)
2592    throws IOException {
2593    checkInitialized();
2594
2595    return MasterProcedureUtil
2596      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2597        @Override
2598        protected void run() throws IOException {
2599          getMaster().getMasterCoprocessorHost().preEnableTable(tableName);
2600
2601          // Normally, it would make sense for this authorization check to exist inside
2602          // AccessController, but because the authorization check is done based on internal state
2603          // (rather than explicit permissions) we'll do the check here instead of in the
2604          // coprocessor.
2605          MasterQuotaManager quotaManager = getMasterQuotaManager();
2606          if (quotaManager != null) {
2607            if (quotaManager.isQuotaInitialized()) {
2608              SpaceQuotaSnapshot currSnapshotOfTable =
2609                QuotaTableUtil.getCurrentSnapshotFromQuotaTable(getConnection(), tableName);
2610              if (currSnapshotOfTable != null) {
2611                SpaceQuotaStatus quotaStatus = currSnapshotOfTable.getQuotaStatus();
2612                if (
2613                  quotaStatus.isInViolation()
2614                    && SpaceViolationPolicy.DISABLE == quotaStatus.getPolicy().orElse(null)
2615                ) {
2616                  throw new AccessDeniedException("Enabling the table '" + tableName
2617                    + "' is disallowed due to a violated space quota.");
2618                }
2619              }
2620            } else if (LOG.isTraceEnabled()) {
2621              LOG
2622                .trace("Unable to check for space quotas as the MasterQuotaManager is not enabled");
2623            }
2624          }
2625
2626          LOG.info(getClientIdAuditPrefix() + " enable " + tableName);
2627
2628          // Execute the operation asynchronously - client will check the progress of the operation
2629          // In case the request is from a <1.1 client before returning,
2630          // we want to make sure that the table is prepared to be
2631          // enabled (the table is locked and the table state is set).
2632          // Note: if the procedure throws exception, we will catch it and rethrow.
2633          final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createLatch();
2634          submitProcedure(
2635            new EnableTableProcedure(procedureExecutor.getEnvironment(), tableName, prepareLatch));
2636          prepareLatch.await();
2637
2638          getMaster().getMasterCoprocessorHost().postEnableTable(tableName);
2639        }
2640
2641        @Override
2642        protected String getDescription() {
2643          return "EnableTableProcedure";
2644        }
2645      });
2646  }
2647
2648  @Override
2649  public long disableTable(final TableName tableName, final long nonceGroup, final long nonce)
2650    throws IOException {
2651    checkInitialized();
2652
2653    return MasterProcedureUtil
2654      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2655        @Override
2656        protected void run() throws IOException {
2657          getMaster().getMasterCoprocessorHost().preDisableTable(tableName);
2658
2659          LOG.info(getClientIdAuditPrefix() + " disable " + tableName);
2660
2661          // Execute the operation asynchronously - client will check the progress of the operation
2662          // In case the request is from a <1.1 client before returning,
2663          // we want to make sure that the table is prepared to be
2664          // enabled (the table is locked and the table state is set).
2665          // Note: if the procedure throws exception, we will catch it and rethrow.
2666          //
2667          // We need to wait for the procedure to potentially fail due to "prepare" sanity
2668          // checks. This will block only the beginning of the procedure. See HBASE-19953.
2669          final ProcedurePrepareLatch prepareLatch = ProcedurePrepareLatch.createBlockingLatch();
2670          submitProcedure(new DisableTableProcedure(procedureExecutor.getEnvironment(), tableName,
2671            false, prepareLatch));
2672          prepareLatch.await();
2673
2674          getMaster().getMasterCoprocessorHost().postDisableTable(tableName);
2675        }
2676
2677        @Override
2678        protected String getDescription() {
2679          return "DisableTableProcedure";
2680        }
2681      });
2682  }
2683
2684  private long modifyTable(final TableName tableName,
2685    final TableDescriptorGetter newDescriptorGetter, final long nonceGroup, final long nonce,
2686    final boolean shouldCheckDescriptor) throws IOException {
2687    return MasterProcedureUtil
2688      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2689        @Override
2690        protected void run() throws IOException {
2691          TableDescriptor oldDescriptor = getMaster().getTableDescriptors().get(tableName);
2692          TableDescriptor newDescriptor = getMaster().getMasterCoprocessorHost()
2693            .preModifyTable(tableName, oldDescriptor, newDescriptorGetter.get());
2694          TableDescriptorChecker.sanityCheck(conf, newDescriptor);
2695          LOG.info("{} modify table {} from {} to {}", getClientIdAuditPrefix(), tableName,
2696            oldDescriptor, newDescriptor);
2697
2698          // Execute the operation synchronously - wait for the operation completes before
2699          // continuing.
2700          //
2701          // We need to wait for the procedure to potentially fail due to "prepare" sanity
2702          // checks. This will block only the beginning of the procedure. See HBASE-19953.
2703          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
2704          submitProcedure(new ModifyTableProcedure(procedureExecutor.getEnvironment(),
2705            newDescriptor, latch, oldDescriptor, shouldCheckDescriptor));
2706          latch.await();
2707
2708          getMaster().getMasterCoprocessorHost().postModifyTable(tableName, oldDescriptor,
2709            newDescriptor);
2710        }
2711
2712        @Override
2713        protected String getDescription() {
2714          return "ModifyTableProcedure";
2715        }
2716      });
2717
2718  }
2719
2720  @Override
2721  public long modifyTable(final TableName tableName, final TableDescriptor newDescriptor,
2722    final long nonceGroup, final long nonce) throws IOException {
2723    checkInitialized();
2724    return modifyTable(tableName, new TableDescriptorGetter() {
2725      @Override
2726      public TableDescriptor get() throws IOException {
2727        return newDescriptor;
2728      }
2729    }, nonceGroup, nonce, false);
2730
2731  }
2732
2733  @Override
2734  public long modifyTableStoreFileTracker(TableName tableName, String dstSFT, long nonceGroup,
2735    long nonce) throws IOException {
2736    checkInitialized();
2737    return MasterProcedureUtil
2738      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2739
2740        @Override
2741        protected void run() throws IOException {
2742          String sft = getMaster().getMasterCoprocessorHost()
2743            .preModifyTableStoreFileTracker(tableName, dstSFT);
2744          LOG.info("{} modify table store file tracker of table {} to {}", getClientIdAuditPrefix(),
2745            tableName, sft);
2746          submitProcedure(new ModifyTableStoreFileTrackerProcedure(
2747            procedureExecutor.getEnvironment(), tableName, sft));
2748          getMaster().getMasterCoprocessorHost().postModifyTableStoreFileTracker(tableName, sft);
2749        }
2750
2751        @Override
2752        protected String getDescription() {
2753          return "ModifyTableStoreFileTrackerProcedure";
2754        }
2755      });
2756  }
2757
2758  public long restoreSnapshot(final SnapshotDescription snapshotDesc, final long nonceGroup,
2759    final long nonce, final boolean restoreAcl, final String customSFT) throws IOException {
2760    checkInitialized();
2761    getSnapshotManager().checkSnapshotSupport();
2762
2763    // Ensure namespace exists. Will throw exception if non-known NS.
2764    final TableName dstTable = TableName.valueOf(snapshotDesc.getTable());
2765    getClusterSchema().getNamespace(dstTable.getNamespaceAsString());
2766
2767    return MasterProcedureUtil
2768      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
2769        @Override
2770        protected void run() throws IOException {
2771          setProcId(getSnapshotManager().restoreOrCloneSnapshot(snapshotDesc, getNonceKey(),
2772            restoreAcl, customSFT));
2773        }
2774
2775        @Override
2776        protected String getDescription() {
2777          return "RestoreSnapshotProcedure";
2778        }
2779      });
2780  }
2781
2782  private void checkTableExists(final TableName tableName)
2783    throws IOException, TableNotFoundException {
2784    if (!tableDescriptors.exists(tableName)) {
2785      throw new TableNotFoundException(tableName);
2786    }
2787  }
2788
2789  @Override
2790  public void checkTableModifiable(final TableName tableName)
2791    throws IOException, TableNotFoundException, TableNotDisabledException {
2792    if (isCatalogTable(tableName)) {
2793      throw new IOException("Can't modify catalog tables");
2794    }
2795    checkTableExists(tableName);
2796    TableState ts = getTableStateManager().getTableState(tableName);
2797    if (!ts.isDisabled()) {
2798      throw new TableNotDisabledException("Not DISABLED; " + ts);
2799    }
2800  }
2801
2802  public ClusterMetrics getClusterMetricsWithoutCoprocessor() throws InterruptedIOException {
2803    return getClusterMetricsWithoutCoprocessor(EnumSet.allOf(Option.class));
2804  }
2805
2806  public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options)
2807    throws InterruptedIOException {
2808    ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
2809    // given that hbase1 can't submit the request with Option,
2810    // we return all information to client if the list of Option is empty.
2811    if (options.isEmpty()) {
2812      options = EnumSet.allOf(Option.class);
2813    }
2814
2815    // TASKS and/or LIVE_SERVERS will populate this map, which will be given to the builder if
2816    // not null after option processing completes.
2817    Map<ServerName, ServerMetrics> serverMetricsMap = null;
2818
2819    for (Option opt : options) {
2820      switch (opt) {
2821        case HBASE_VERSION:
2822          builder.setHBaseVersion(VersionInfo.getVersion());
2823          break;
2824        case CLUSTER_ID:
2825          builder.setClusterId(getClusterId());
2826          break;
2827        case MASTER:
2828          builder.setMasterName(getServerName());
2829          break;
2830        case BACKUP_MASTERS:
2831          builder.setBackerMasterNames(getBackupMasters());
2832          break;
2833        case TASKS: {
2834          // Master tasks
2835          builder.setMasterTasks(TaskMonitor.get().getTasks().stream()
2836            .map(task -> ServerTaskBuilder.newBuilder().setDescription(task.getDescription())
2837              .setStatus(task.getStatus())
2838              .setState(ServerTask.State.valueOf(task.getState().name()))
2839              .setStartTime(task.getStartTime()).setCompletionTime(task.getCompletionTimestamp())
2840              .build())
2841            .collect(Collectors.toList()));
2842          // TASKS is also synonymous with LIVE_SERVERS for now because task information for
2843          // regionservers is carried in ServerLoad.
2844          // Add entries to serverMetricsMap for all live servers, if we haven't already done so
2845          if (serverMetricsMap == null) {
2846            serverMetricsMap = getOnlineServers();
2847          }
2848          break;
2849        }
2850        case LIVE_SERVERS: {
2851          // Add entries to serverMetricsMap for all live servers, if we haven't already done so
2852          if (serverMetricsMap == null) {
2853            serverMetricsMap = getOnlineServers();
2854          }
2855          break;
2856        }
2857        case DEAD_SERVERS: {
2858          if (serverManager != null) {
2859            builder.setDeadServerNames(
2860              new ArrayList<>(serverManager.getDeadServers().copyServerNames()));
2861          }
2862          break;
2863        }
2864        case UNKNOWN_SERVERS: {
2865          if (serverManager != null) {
2866            builder.setUnknownServerNames(getUnknownServers());
2867          }
2868          break;
2869        }
2870        case MASTER_COPROCESSORS: {
2871          if (cpHost != null) {
2872            builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
2873          }
2874          break;
2875        }
2876        case REGIONS_IN_TRANSITION: {
2877          if (assignmentManager != null) {
2878            builder.setRegionsInTransition(
2879              assignmentManager.getRegionStates().getRegionsStateInTransition());
2880          }
2881          break;
2882        }
2883        case BALANCER_ON: {
2884          if (loadBalancerStateStore != null) {
2885            builder.setBalancerOn(loadBalancerStateStore.get());
2886          }
2887          break;
2888        }
2889        case MASTER_INFO_PORT: {
2890          if (infoServer != null) {
2891            builder.setMasterInfoPort(infoServer.getPort());
2892          }
2893          break;
2894        }
2895        case SERVERS_NAME: {
2896          if (serverManager != null) {
2897            builder.setServerNames(serverManager.getOnlineServersList());
2898          }
2899          break;
2900        }
2901        case TABLE_TO_REGIONS_COUNT: {
2902          if (isActiveMaster() && isInitialized() && assignmentManager != null) {
2903            try {
2904              Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
2905              Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
2906              for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
2907                TableName tableName = tableDescriptor.getTableName();
2908                RegionStatesCount regionStatesCount =
2909                  assignmentManager.getRegionStatesCount(tableName);
2910                tableRegionStatesCountMap.put(tableName, regionStatesCount);
2911              }
2912              builder.setTableRegionStatesCount(tableRegionStatesCountMap);
2913            } catch (IOException e) {
2914              LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
2915            }
2916          }
2917          break;
2918        }
2919        case DECOMMISSIONED_SERVERS: {
2920          if (serverManager != null) {
2921            builder.setDecommissionedServerNames(serverManager.getDrainingServersList());
2922          }
2923          break;
2924        }
2925      }
2926    }
2927
2928    if (serverMetricsMap != null) {
2929      builder.setLiveServerMetrics(serverMetricsMap);
2930    }
2931
2932    return builder.build();
2933  }
2934
2935  private List<ServerName> getUnknownServers() {
2936    if (serverManager != null) {
2937      final Set<ServerName> serverNames = getAssignmentManager().getRegionStates().getRegionStates()
2938        .stream().map(RegionState::getServerName).collect(Collectors.toSet());
2939      final List<ServerName> unknownServerNames = serverNames.stream()
2940        .filter(sn -> sn != null && serverManager.isServerUnknown(sn)).collect(Collectors.toList());
2941      return unknownServerNames;
2942    }
2943    return null;
2944  }
2945
2946  private Map<ServerName, ServerMetrics> getOnlineServers() {
2947    if (serverManager != null) {
2948      final Map<ServerName, ServerMetrics> map = new HashMap<>();
2949      serverManager.getOnlineServers().entrySet().forEach(e -> map.put(e.getKey(), e.getValue()));
2950      return map;
2951    }
2952    return null;
2953  }
2954
2955  /** Returns cluster status */
2956  public ClusterMetrics getClusterMetrics() throws IOException {
2957    return getClusterMetrics(EnumSet.allOf(Option.class));
2958  }
2959
2960  public ClusterMetrics getClusterMetrics(EnumSet<Option> options) throws IOException {
2961    if (cpHost != null) {
2962      cpHost.preGetClusterMetrics();
2963    }
2964    ClusterMetrics status = getClusterMetricsWithoutCoprocessor(options);
2965    if (cpHost != null) {
2966      cpHost.postGetClusterMetrics(status);
2967    }
2968    return status;
2969  }
2970
2971  /** Returns info port of active master or 0 if any exception occurs. */
2972  public int getActiveMasterInfoPort() {
2973    return activeMasterManager.getActiveMasterInfoPort();
2974  }
2975
2976  /**
2977   * @param sn is ServerName of the backup master
2978   * @return info port of backup master or 0 if any exception occurs.
2979   */
2980  public int getBackupMasterInfoPort(final ServerName sn) {
2981    return activeMasterManager.getBackupMasterInfoPort(sn);
2982  }
2983
2984  /**
2985   * The set of loaded coprocessors is stored in a static set. Since it's statically allocated, it
2986   * does not require that HMaster's cpHost be initialized prior to accessing it.
2987   * @return a String representation of the set of names of the loaded coprocessors.
2988   */
2989  public static String getLoadedCoprocessors() {
2990    return CoprocessorHost.getLoadedCoprocessors().toString();
2991  }
2992
2993  /** Returns timestamp in millis when HMaster was started. */
2994  public long getMasterStartTime() {
2995    return startcode;
2996  }
2997
2998  /** Returns timestamp in millis when HMaster became the active master. */
2999  public long getMasterActiveTime() {
3000    return masterActiveTime;
3001  }
3002
3003  /** Returns timestamp in millis when HMaster finished becoming the active master */
3004  public long getMasterFinishedInitializationTime() {
3005    return masterFinishedInitializationTime;
3006  }
3007
3008  public int getNumWALFiles() {
3009    return 0;
3010  }
3011
3012  public ProcedureStore getProcedureStore() {
3013    return procedureStore;
3014  }
3015
3016  public int getRegionServerInfoPort(final ServerName sn) {
3017    int port = this.serverManager.getInfoPort(sn);
3018    return port == 0
3019      ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT, HConstants.DEFAULT_REGIONSERVER_INFOPORT)
3020      : port;
3021  }
3022
3023  @Override
3024  public String getRegionServerVersion(ServerName sn) {
3025    // Will return "0.0.0" if the server is not online to prevent move system region to unknown
3026    // version RS.
3027    return this.serverManager.getVersion(sn);
3028  }
3029
3030  @Override
3031  public void checkIfShouldMoveSystemRegionAsync() {
3032    assignmentManager.checkIfShouldMoveSystemRegionAsync();
3033  }
3034
3035  /** Returns array of coprocessor SimpleNames. */
3036  public String[] getMasterCoprocessors() {
3037    Set<String> masterCoprocessors = getMasterCoprocessorHost().getCoprocessors();
3038    return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
3039  }
3040
3041  @Override
3042  public void abort(String reason, Throwable cause) {
3043    if (!setAbortRequested() || isStopped()) {
3044      LOG.debug("Abort called but aborted={}, stopped={}", isAborted(), isStopped());
3045      return;
3046    }
3047    if (cpHost != null) {
3048      // HBASE-4014: dump a list of loaded coprocessors.
3049      LOG.error(HBaseMarkers.FATAL,
3050        "Master server abort: loaded coprocessors are: " + getLoadedCoprocessors());
3051    }
3052    String msg = "***** ABORTING master " + this + ": " + reason + " *****";
3053    if (cause != null) {
3054      LOG.error(HBaseMarkers.FATAL, msg, cause);
3055    } else {
3056      LOG.error(HBaseMarkers.FATAL, msg);
3057    }
3058
3059    try {
3060      stopMaster();
3061    } catch (IOException e) {
3062      LOG.error("Exception occurred while stopping master", e);
3063    }
3064  }
3065
3066  @Override
3067  public MasterCoprocessorHost getMasterCoprocessorHost() {
3068    return cpHost;
3069  }
3070
3071  @Override
3072  public MasterQuotaManager getMasterQuotaManager() {
3073    return quotaManager;
3074  }
3075
3076  @Override
3077  public ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
3078    return procedureExecutor;
3079  }
3080
3081  @Override
3082  public ServerName getServerName() {
3083    return this.serverName;
3084  }
3085
3086  @Override
3087  public AssignmentManager getAssignmentManager() {
3088    return this.assignmentManager;
3089  }
3090
3091  @Override
3092  public CatalogJanitor getCatalogJanitor() {
3093    return this.catalogJanitorChore;
3094  }
3095
3096  public MemoryBoundedLogMessageBuffer getRegionServerFatalLogBuffer() {
3097    return rsFatals;
3098  }
3099
3100  /**
3101   * Shutdown the cluster. Master runs a coordinated stop of all RegionServers and then itself.
3102   */
3103  public void shutdown() throws IOException {
3104    TraceUtil.trace(() -> {
3105      if (cpHost != null) {
3106        cpHost.preShutdown();
3107      }
3108
3109      // Tell the servermanager cluster shutdown has been called. This makes it so when Master is
3110      // last running server, it'll stop itself. Next, we broadcast the cluster shutdown by setting
3111      // the cluster status as down. RegionServers will notice this change in state and will start
3112      // shutting themselves down. When last has exited, Master can go down.
3113      if (this.serverManager != null) {
3114        this.serverManager.shutdownCluster();
3115      }
3116      if (this.clusterStatusTracker != null) {
3117        try {
3118          this.clusterStatusTracker.setClusterDown();
3119        } catch (KeeperException e) {
3120          LOG.error("ZooKeeper exception trying to set cluster as down in ZK", e);
3121        }
3122      }
3123      // Stop the procedure executor. Will stop any ongoing assign, unassign, server crash etc.,
3124      // processing so we can go down.
3125      if (this.procedureExecutor != null) {
3126        this.procedureExecutor.stop();
3127      }
3128      // Shutdown our cluster connection. This will kill any hosted RPCs that might be going on;
3129      // this is what we want especially if the Master is in startup phase doing call outs to
3130      // hbase:meta, etc. when cluster is down. Without ths connection close, we'd have to wait on
3131      // the rpc to timeout.
3132      if (this.asyncClusterConnection != null) {
3133        this.asyncClusterConnection.close();
3134      }
3135    }, "HMaster.shutdown");
3136  }
3137
3138  public void stopMaster() throws IOException {
3139    if (cpHost != null) {
3140      cpHost.preStopMaster();
3141    }
3142    stop("Stopped by " + Thread.currentThread().getName());
3143  }
3144
3145  @Override
3146  public void stop(String msg) {
3147    if (!this.stopped) {
3148      LOG.info("***** STOPPING master '" + this + "' *****");
3149      this.stopped = true;
3150      LOG.info("STOPPED: " + msg);
3151      // Wakes run() if it is sleeping
3152      sleeper.skipSleepCycle();
3153      if (this.activeMasterManager != null) {
3154        this.activeMasterManager.stop();
3155      }
3156    }
3157  }
3158
3159  protected void checkServiceStarted() throws ServerNotRunningYetException {
3160    if (!serviceStarted) {
3161      throw new ServerNotRunningYetException("Server is not running yet");
3162    }
3163  }
3164
3165  void checkInitialized() throws PleaseHoldException, ServerNotRunningYetException,
3166    MasterNotRunningException, MasterStoppedException {
3167    checkServiceStarted();
3168    if (!isInitialized()) {
3169      throw new PleaseHoldException("Master is initializing");
3170    }
3171    if (isStopped()) {
3172      throw new MasterStoppedException();
3173    }
3174  }
3175
3176  /**
3177   * Report whether this master is currently the active master or not. If not active master, we are
3178   * parked on ZK waiting to become active. This method is used for testing.
3179   * @return true if active master, false if not.
3180   */
3181  @Override
3182  public boolean isActiveMaster() {
3183    return activeMaster;
3184  }
3185
3186  /**
3187   * Report whether this master has completed with its initialization and is ready. If ready, the
3188   * master is also the active master. A standby master is never ready. This method is used for
3189   * testing.
3190   * @return true if master is ready to go, false if not.
3191   */
3192  @Override
3193  public boolean isInitialized() {
3194    return initialized.isReady();
3195  }
3196
3197  /**
3198   * Report whether this master is started This method is used for testing.
3199   * @return true if master is ready to go, false if not.
3200   */
3201  public boolean isOnline() {
3202    return serviceStarted;
3203  }
3204
3205  /**
3206   * Report whether this master is in maintenance mode.
3207   * @return true if master is in maintenanceMode
3208   */
3209  @Override
3210  public boolean isInMaintenanceMode() {
3211    return maintenanceMode;
3212  }
3213
3214  public void setInitialized(boolean isInitialized) {
3215    procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
3216  }
3217
3218  @Override
3219  public ProcedureEvent<?> getInitializedEvent() {
3220    return initialized;
3221  }
3222
3223  /**
3224   * Compute the average load across all region servers. Currently, this uses a very naive
3225   * computation - just uses the number of regions being served, ignoring stats about number of
3226   * requests.
3227   * @return the average load
3228   */
3229  public double getAverageLoad() {
3230    if (this.assignmentManager == null) {
3231      return 0;
3232    }
3233
3234    RegionStates regionStates = this.assignmentManager.getRegionStates();
3235    if (regionStates == null) {
3236      return 0;
3237    }
3238    return regionStates.getAverageLoad();
3239  }
3240
3241  @Override
3242  public boolean registerService(Service instance) {
3243    /*
3244     * No stacking of instances is allowed for a single service name
3245     */
3246    Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
3247    String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
3248    if (coprocessorServiceHandlers.containsKey(serviceName)) {
3249      LOG.error("Coprocessor service " + serviceName
3250        + " already registered, rejecting request from " + instance);
3251      return false;
3252    }
3253
3254    coprocessorServiceHandlers.put(serviceName, instance);
3255    if (LOG.isDebugEnabled()) {
3256      LOG.debug("Registered master coprocessor service: service=" + serviceName);
3257    }
3258    return true;
3259  }
3260
3261  /**
3262   * Utility for constructing an instance of the passed HMaster class.
3263   * @return HMaster instance.
3264   */
3265  public static HMaster constructMaster(Class<? extends HMaster> masterClass,
3266    final Configuration conf) {
3267    try {
3268      Constructor<? extends HMaster> c = masterClass.getConstructor(Configuration.class);
3269      return c.newInstance(conf);
3270    } catch (Exception e) {
3271      Throwable error = e;
3272      if (
3273        e instanceof InvocationTargetException
3274          && ((InvocationTargetException) e).getTargetException() != null
3275      ) {
3276        error = ((InvocationTargetException) e).getTargetException();
3277      }
3278      throw new RuntimeException("Failed construction of Master: " + masterClass.toString() + ". ",
3279        error);
3280    }
3281  }
3282
3283  /**
3284   * @see org.apache.hadoop.hbase.master.HMasterCommandLine
3285   */
3286  public static void main(String[] args) {
3287    LOG.info("STARTING service " + HMaster.class.getSimpleName());
3288    VersionInfo.logVersion();
3289    new HMasterCommandLine(HMaster.class).doMain(args);
3290  }
3291
3292  public HFileCleaner getHFileCleaner() {
3293    return this.hfileCleaners.get(0);
3294  }
3295
3296  public List<HFileCleaner> getHFileCleaners() {
3297    return this.hfileCleaners;
3298  }
3299
3300  public LogCleaner getLogCleaner() {
3301    return this.logCleaner;
3302  }
3303
3304  /** Returns the underlying snapshot manager */
3305  @Override
3306  public SnapshotManager getSnapshotManager() {
3307    return this.snapshotManager;
3308  }
3309
3310  /** Returns the underlying MasterProcedureManagerHost */
3311  @Override
3312  public MasterProcedureManagerHost getMasterProcedureManagerHost() {
3313    return mpmHost;
3314  }
3315
3316  @Override
3317  public ClusterSchema getClusterSchema() {
3318    return this.clusterSchemaService;
3319  }
3320
3321  /**
3322   * Create a new Namespace.
3323   * @param namespaceDescriptor descriptor for new Namespace
3324   * @param nonceGroup          Identifier for the source of the request, a client or process.
3325   * @param nonce               A unique identifier for this operation from the client or process
3326   *                            identified by <code>nonceGroup</code> (the source must ensure each
3327   *                            operation gets a unique id).
3328   * @return procedure id
3329   */
3330  long createNamespace(final NamespaceDescriptor namespaceDescriptor, final long nonceGroup,
3331    final long nonce) throws IOException {
3332    checkInitialized();
3333
3334    TableName.isLegalNamespaceName(Bytes.toBytes(namespaceDescriptor.getName()));
3335
3336    return MasterProcedureUtil
3337      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3338        @Override
3339        protected void run() throws IOException {
3340          getMaster().getMasterCoprocessorHost().preCreateNamespace(namespaceDescriptor);
3341          // We need to wait for the procedure to potentially fail due to "prepare" sanity
3342          // checks. This will block only the beginning of the procedure. See HBASE-19953.
3343          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3344          LOG.info(getClientIdAuditPrefix() + " creating " + namespaceDescriptor);
3345          // Execute the operation synchronously - wait for the operation to complete before
3346          // continuing.
3347          setProcId(getClusterSchema().createNamespace(namespaceDescriptor, getNonceKey(), latch));
3348          latch.await();
3349          getMaster().getMasterCoprocessorHost().postCreateNamespace(namespaceDescriptor);
3350        }
3351
3352        @Override
3353        protected String getDescription() {
3354          return "CreateNamespaceProcedure";
3355        }
3356      });
3357  }
3358
3359  /**
3360   * Modify an existing Namespace.
3361   * @param nonceGroup Identifier for the source of the request, a client or process.
3362   * @param nonce      A unique identifier for this operation from the client or process identified
3363   *                   by <code>nonceGroup</code> (the source must ensure each operation gets a
3364   *                   unique id).
3365   * @return procedure id
3366   */
3367  long modifyNamespace(final NamespaceDescriptor newNsDescriptor, final long nonceGroup,
3368    final long nonce) throws IOException {
3369    checkInitialized();
3370
3371    TableName.isLegalNamespaceName(Bytes.toBytes(newNsDescriptor.getName()));
3372
3373    return MasterProcedureUtil
3374      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3375        @Override
3376        protected void run() throws IOException {
3377          NamespaceDescriptor oldNsDescriptor = getNamespace(newNsDescriptor.getName());
3378          getMaster().getMasterCoprocessorHost().preModifyNamespace(oldNsDescriptor,
3379            newNsDescriptor);
3380          // We need to wait for the procedure to potentially fail due to "prepare" sanity
3381          // checks. This will block only the beginning of the procedure. See HBASE-19953.
3382          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3383          LOG.info(getClientIdAuditPrefix() + " modify " + newNsDescriptor);
3384          // Execute the operation synchronously - wait for the operation to complete before
3385          // continuing.
3386          setProcId(getClusterSchema().modifyNamespace(newNsDescriptor, getNonceKey(), latch));
3387          latch.await();
3388          getMaster().getMasterCoprocessorHost().postModifyNamespace(oldNsDescriptor,
3389            newNsDescriptor);
3390        }
3391
3392        @Override
3393        protected String getDescription() {
3394          return "ModifyNamespaceProcedure";
3395        }
3396      });
3397  }
3398
3399  /**
3400   * Delete an existing Namespace. Only empty Namespaces (no tables) can be removed.
3401   * @param nonceGroup Identifier for the source of the request, a client or process.
3402   * @param nonce      A unique identifier for this operation from the client or process identified
3403   *                   by <code>nonceGroup</code> (the source must ensure each operation gets a
3404   *                   unique id).
3405   * @return procedure id
3406   */
3407  long deleteNamespace(final String name, final long nonceGroup, final long nonce)
3408    throws IOException {
3409    checkInitialized();
3410
3411    return MasterProcedureUtil
3412      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
3413        @Override
3414        protected void run() throws IOException {
3415          getMaster().getMasterCoprocessorHost().preDeleteNamespace(name);
3416          LOG.info(getClientIdAuditPrefix() + " delete " + name);
3417          // Execute the operation synchronously - wait for the operation to complete before
3418          // continuing.
3419          //
3420          // We need to wait for the procedure to potentially fail due to "prepare" sanity
3421          // checks. This will block only the beginning of the procedure. See HBASE-19953.
3422          ProcedurePrepareLatch latch = ProcedurePrepareLatch.createBlockingLatch();
3423          setProcId(submitProcedure(
3424            new DeleteNamespaceProcedure(procedureExecutor.getEnvironment(), name, latch)));
3425          latch.await();
3426          // Will not be invoked in the face of Exception thrown by the Procedure's execution
3427          getMaster().getMasterCoprocessorHost().postDeleteNamespace(name);
3428        }
3429
3430        @Override
3431        protected String getDescription() {
3432          return "DeleteNamespaceProcedure";
3433        }
3434      });
3435  }
3436
3437  /**
3438   * Get a Namespace
3439   * @param name Name of the Namespace
3440   * @return Namespace descriptor for <code>name</code>
3441   */
3442  NamespaceDescriptor getNamespace(String name) throws IOException {
3443    checkInitialized();
3444    if (this.cpHost != null) this.cpHost.preGetNamespaceDescriptor(name);
3445    NamespaceDescriptor nsd = this.clusterSchemaService.getNamespace(name);
3446    if (this.cpHost != null) this.cpHost.postGetNamespaceDescriptor(nsd);
3447    return nsd;
3448  }
3449
3450  /**
3451   * Get all Namespaces
3452   * @return All Namespace descriptors
3453   */
3454  List<NamespaceDescriptor> getNamespaces() throws IOException {
3455    checkInitialized();
3456    final List<NamespaceDescriptor> nsds = new ArrayList<>();
3457    if (cpHost != null) {
3458      cpHost.preListNamespaceDescriptors(nsds);
3459    }
3460    nsds.addAll(this.clusterSchemaService.getNamespaces());
3461    if (this.cpHost != null) {
3462      this.cpHost.postListNamespaceDescriptors(nsds);
3463    }
3464    return nsds;
3465  }
3466
3467  /**
3468   * List namespace names
3469   * @return All namespace names
3470   */
3471  public List<String> listNamespaces() throws IOException {
3472    checkInitialized();
3473    List<String> namespaces = new ArrayList<>();
3474    if (cpHost != null) {
3475      cpHost.preListNamespaces(namespaces);
3476    }
3477    for (NamespaceDescriptor namespace : clusterSchemaService.getNamespaces()) {
3478      namespaces.add(namespace.getName());
3479    }
3480    if (cpHost != null) {
3481      cpHost.postListNamespaces(namespaces);
3482    }
3483    return namespaces;
3484  }
3485
3486  @Override
3487  public List<TableName> listTableNamesByNamespace(String name) throws IOException {
3488    checkInitialized();
3489    return listTableNames(name, null, true);
3490  }
3491
3492  @Override
3493  public List<TableDescriptor> listTableDescriptorsByNamespace(String name) throws IOException {
3494    checkInitialized();
3495    return listTableDescriptors(name, null, null, true);
3496  }
3497
3498  @Override
3499  public boolean abortProcedure(final long procId, final boolean mayInterruptIfRunning)
3500    throws IOException {
3501    if (cpHost != null) {
3502      cpHost.preAbortProcedure(this.procedureExecutor, procId);
3503    }
3504
3505    final boolean result = this.procedureExecutor.abort(procId, mayInterruptIfRunning);
3506
3507    if (cpHost != null) {
3508      cpHost.postAbortProcedure();
3509    }
3510
3511    return result;
3512  }
3513
3514  @Override
3515  public List<Procedure<?>> getProcedures() throws IOException {
3516    if (cpHost != null) {
3517      cpHost.preGetProcedures();
3518    }
3519
3520    @SuppressWarnings({ "unchecked", "rawtypes" })
3521    List<Procedure<?>> procList = (List) this.procedureExecutor.getProcedures();
3522
3523    if (cpHost != null) {
3524      cpHost.postGetProcedures(procList);
3525    }
3526
3527    return procList;
3528  }
3529
3530  @Override
3531  public List<LockedResource> getLocks() throws IOException {
3532    if (cpHost != null) {
3533      cpHost.preGetLocks();
3534    }
3535
3536    MasterProcedureScheduler procedureScheduler =
3537      procedureExecutor.getEnvironment().getProcedureScheduler();
3538
3539    final List<LockedResource> lockedResources = procedureScheduler.getLocks();
3540
3541    if (cpHost != null) {
3542      cpHost.postGetLocks(lockedResources);
3543    }
3544
3545    return lockedResources;
3546  }
3547
3548  /**
3549   * Returns the list of table descriptors that match the specified request
3550   * @param namespace        the namespace to query, or null if querying for all
3551   * @param regex            The regular expression to match against, or null if querying for all
3552   * @param tableNameList    the list of table names, or null if querying for all
3553   * @param includeSysTables False to match only against userspace tables
3554   * @return the list of table descriptors
3555   */
3556  public List<TableDescriptor> listTableDescriptors(final String namespace, final String regex,
3557    final List<TableName> tableNameList, final boolean includeSysTables) throws IOException {
3558    List<TableDescriptor> htds = new ArrayList<>();
3559    if (cpHost != null) {
3560      cpHost.preGetTableDescriptors(tableNameList, htds, regex);
3561    }
3562    htds = getTableDescriptors(htds, namespace, regex, tableNameList, includeSysTables);
3563    if (cpHost != null) {
3564      cpHost.postGetTableDescriptors(tableNameList, htds, regex);
3565    }
3566    return htds;
3567  }
3568
3569  /**
3570   * Returns the list of table names that match the specified request
3571   * @param regex            The regular expression to match against, or null if querying for all
3572   * @param namespace        the namespace to query, or null if querying for all
3573   * @param includeSysTables False to match only against userspace tables
3574   * @return the list of table names
3575   */
3576  public List<TableName> listTableNames(final String namespace, final String regex,
3577    final boolean includeSysTables) throws IOException {
3578    List<TableDescriptor> htds = new ArrayList<>();
3579    if (cpHost != null) {
3580      cpHost.preGetTableNames(htds, regex);
3581    }
3582    htds = getTableDescriptors(htds, namespace, regex, null, includeSysTables);
3583    if (cpHost != null) {
3584      cpHost.postGetTableNames(htds, regex);
3585    }
3586    List<TableName> result = new ArrayList<>(htds.size());
3587    for (TableDescriptor htd : htds)
3588      result.add(htd.getTableName());
3589    return result;
3590  }
3591
3592  /**
3593   * Return a list of table table descriptors after applying any provided filter parameters. Note
3594   * that the user-facing description of this filter logic is presented on the class-level javadoc
3595   * of {@link NormalizeTableFilterParams}.
3596   */
3597  private List<TableDescriptor> getTableDescriptors(final List<TableDescriptor> htds,
3598    final String namespace, final String regex, final List<TableName> tableNameList,
3599    final boolean includeSysTables) throws IOException {
3600    if (tableNameList == null || tableNameList.isEmpty()) {
3601      // request for all TableDescriptors
3602      Collection<TableDescriptor> allHtds;
3603      if (namespace != null && namespace.length() > 0) {
3604        // Do a check on the namespace existence. Will fail if does not exist.
3605        this.clusterSchemaService.getNamespace(namespace);
3606        allHtds = tableDescriptors.getByNamespace(namespace).values();
3607      } else {
3608        allHtds = tableDescriptors.getAll().values();
3609      }
3610      for (TableDescriptor desc : allHtds) {
3611        if (
3612          tableStateManager.isTablePresent(desc.getTableName())
3613            && (includeSysTables || !desc.getTableName().isSystemTable())
3614        ) {
3615          htds.add(desc);
3616        }
3617      }
3618    } else {
3619      for (TableName s : tableNameList) {
3620        if (tableStateManager.isTablePresent(s)) {
3621          TableDescriptor desc = tableDescriptors.get(s);
3622          if (desc != null) {
3623            htds.add(desc);
3624          }
3625        }
3626      }
3627    }
3628
3629    // Retains only those matched by regular expression.
3630    if (regex != null) filterTablesByRegex(htds, Pattern.compile(regex));
3631    return htds;
3632  }
3633
3634  /**
3635   * Removes the table descriptors that don't match the pattern.
3636   * @param descriptors list of table descriptors to filter
3637   * @param pattern     the regex to use
3638   */
3639  private static void filterTablesByRegex(final Collection<TableDescriptor> descriptors,
3640    final Pattern pattern) {
3641    final String defaultNS = NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR;
3642    Iterator<TableDescriptor> itr = descriptors.iterator();
3643    while (itr.hasNext()) {
3644      TableDescriptor htd = itr.next();
3645      String tableName = htd.getTableName().getNameAsString();
3646      boolean matched = pattern.matcher(tableName).matches();
3647      if (!matched && htd.getTableName().getNamespaceAsString().equals(defaultNS)) {
3648        matched = pattern.matcher(defaultNS + TableName.NAMESPACE_DELIM + tableName).matches();
3649      }
3650      if (!matched) {
3651        itr.remove();
3652      }
3653    }
3654  }
3655
3656  @Override
3657  public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
3658    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3659      .getLastMajorCompactionTimestamp(table);
3660  }
3661
3662  @Override
3663  public long getLastMajorCompactionTimestampForRegion(byte[] regionName) throws IOException {
3664    return getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
3665      .getLastMajorCompactionTimestamp(regionName);
3666  }
3667
3668  /**
3669   * Gets the mob file compaction state for a specific table. Whether all the mob files are selected
3670   * is known during the compaction execution, but the statistic is done just before compaction
3671   * starts, it is hard to know the compaction type at that time, so the rough statistics are chosen
3672   * for the mob file compaction. Only two compaction states are available,
3673   * CompactionState.MAJOR_AND_MINOR and CompactionState.NONE.
3674   * @param tableName The current table name.
3675   * @return If a given table is in mob file compaction now.
3676   */
3677  public GetRegionInfoResponse.CompactionState getMobCompactionState(TableName tableName) {
3678    AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3679    if (compactionsCount != null && compactionsCount.get() != 0) {
3680      return GetRegionInfoResponse.CompactionState.MAJOR_AND_MINOR;
3681    }
3682    return GetRegionInfoResponse.CompactionState.NONE;
3683  }
3684
3685  public void reportMobCompactionStart(TableName tableName) throws IOException {
3686    IdLock.Entry lockEntry = null;
3687    try {
3688      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3689      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3690      if (compactionsCount == null) {
3691        compactionsCount = new AtomicInteger(0);
3692        mobCompactionStates.put(tableName, compactionsCount);
3693      }
3694      compactionsCount.incrementAndGet();
3695    } finally {
3696      if (lockEntry != null) {
3697        mobCompactionLock.releaseLockEntry(lockEntry);
3698      }
3699    }
3700  }
3701
3702  public void reportMobCompactionEnd(TableName tableName) throws IOException {
3703    IdLock.Entry lockEntry = null;
3704    try {
3705      lockEntry = mobCompactionLock.getLockEntry(tableName.hashCode());
3706      AtomicInteger compactionsCount = mobCompactionStates.get(tableName);
3707      if (compactionsCount != null) {
3708        int count = compactionsCount.decrementAndGet();
3709        // remove the entry if the count is 0.
3710        if (count == 0) {
3711          mobCompactionStates.remove(tableName);
3712        }
3713      }
3714    } finally {
3715      if (lockEntry != null) {
3716        mobCompactionLock.releaseLockEntry(lockEntry);
3717      }
3718    }
3719  }
3720
3721  /**
3722   * Queries the state of the {@link LoadBalancerStateStore}. If the balancer is not initialized,
3723   * false is returned.
3724   * @return The state of the load balancer, or false if the load balancer isn't defined.
3725   */
3726  public boolean isBalancerOn() {
3727    return !isInMaintenanceMode() && loadBalancerStateStore != null && loadBalancerStateStore.get();
3728  }
3729
3730  /**
3731   * Queries the state of the {@link RegionNormalizerStateStore}. If it's not initialized, false is
3732   * returned.
3733   */
3734  public boolean isNormalizerOn() {
3735    return !isInMaintenanceMode() && getRegionNormalizerManager().isNormalizerOn();
3736  }
3737
3738  /**
3739   * Queries the state of the {@link SplitOrMergeStateStore}. If it is not initialized, false is
3740   * returned. If switchType is illegal, false will return.
3741   * @param switchType see {@link org.apache.hadoop.hbase.client.MasterSwitchType}
3742   * @return The state of the switch
3743   */
3744  @Override
3745  public boolean isSplitOrMergeEnabled(MasterSwitchType switchType) {
3746    return !isInMaintenanceMode() && splitOrMergeStateStore != null
3747      && splitOrMergeStateStore.isSplitOrMergeEnabled(switchType);
3748  }
3749
3750  /**
3751   * Fetch the configured {@link LoadBalancer} class name. If none is set, a default is returned.
3752   * <p/>
3753   * Notice that, the base load balancer will always be {@link RSGroupBasedLoadBalancer} now, so
3754   * this method will return the balancer used inside each rs group.
3755   * @return The name of the {@link LoadBalancer} in use.
3756   */
3757  public String getLoadBalancerClassName() {
3758    return conf.get(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
3759      LoadBalancerFactory.getDefaultLoadBalancerClass().getName());
3760  }
3761
3762  public SplitOrMergeStateStore getSplitOrMergeStateStore() {
3763    return splitOrMergeStateStore;
3764  }
3765
3766  @Override
3767  public RSGroupBasedLoadBalancer getLoadBalancer() {
3768    return balancer;
3769  }
3770
3771  @Override
3772  public FavoredNodesManager getFavoredNodesManager() {
3773    return balancer.getFavoredNodesManager();
3774  }
3775
3776  private long executePeerProcedure(AbstractPeerProcedure<?> procedure) throws IOException {
3777    long procId = procedureExecutor.submitProcedure(procedure);
3778    procedure.getLatch().await();
3779    return procId;
3780  }
3781
3782  @Override
3783  public long addReplicationPeer(String peerId, ReplicationPeerConfig peerConfig, boolean enabled)
3784    throws ReplicationException, IOException {
3785    LOG.info(getClientIdAuditPrefix() + " creating replication peer, id=" + peerId + ", config="
3786      + peerConfig + ", state=" + (enabled ? "ENABLED" : "DISABLED"));
3787    return executePeerProcedure(new AddPeerProcedure(peerId, peerConfig, enabled));
3788  }
3789
3790  @Override
3791  public long removeReplicationPeer(String peerId) throws ReplicationException, IOException {
3792    LOG.info(getClientIdAuditPrefix() + " removing replication peer, id=" + peerId);
3793    return executePeerProcedure(new RemovePeerProcedure(peerId));
3794  }
3795
3796  @Override
3797  public long enableReplicationPeer(String peerId) throws ReplicationException, IOException {
3798    LOG.info(getClientIdAuditPrefix() + " enable replication peer, id=" + peerId);
3799    return executePeerProcedure(new EnablePeerProcedure(peerId));
3800  }
3801
3802  @Override
3803  public long disableReplicationPeer(String peerId) throws ReplicationException, IOException {
3804    LOG.info(getClientIdAuditPrefix() + " disable replication peer, id=" + peerId);
3805    return executePeerProcedure(new DisablePeerProcedure(peerId));
3806  }
3807
3808  @Override
3809  public ReplicationPeerConfig getReplicationPeerConfig(String peerId)
3810    throws ReplicationException, IOException {
3811    if (cpHost != null) {
3812      cpHost.preGetReplicationPeerConfig(peerId);
3813    }
3814    LOG.info(getClientIdAuditPrefix() + " get replication peer config, id=" + peerId);
3815    ReplicationPeerConfig peerConfig = this.replicationPeerManager.getPeerConfig(peerId)
3816      .orElseThrow(() -> new ReplicationPeerNotFoundException(peerId));
3817    if (cpHost != null) {
3818      cpHost.postGetReplicationPeerConfig(peerId);
3819    }
3820    return peerConfig;
3821  }
3822
3823  @Override
3824  public long updateReplicationPeerConfig(String peerId, ReplicationPeerConfig peerConfig)
3825    throws ReplicationException, IOException {
3826    LOG.info(getClientIdAuditPrefix() + " update replication peer config, id=" + peerId
3827      + ", config=" + peerConfig);
3828    return executePeerProcedure(new UpdatePeerConfigProcedure(peerId, peerConfig));
3829  }
3830
3831  @Override
3832  public List<ReplicationPeerDescription> listReplicationPeers(String regex)
3833    throws ReplicationException, IOException {
3834    if (cpHost != null) {
3835      cpHost.preListReplicationPeers(regex);
3836    }
3837    LOG.debug("{} list replication peers, regex={}", getClientIdAuditPrefix(), regex);
3838    Pattern pattern = regex == null ? null : Pattern.compile(regex);
3839    List<ReplicationPeerDescription> peers = this.replicationPeerManager.listPeers(pattern);
3840    if (cpHost != null) {
3841      cpHost.postListReplicationPeers(regex);
3842    }
3843    return peers;
3844  }
3845
3846  @Override
3847  public long transitReplicationPeerSyncReplicationState(String peerId, SyncReplicationState state)
3848    throws ReplicationException, IOException {
3849    LOG.info(
3850      getClientIdAuditPrefix()
3851        + " transit current cluster state to {} in a synchronous replication peer id={}",
3852      state, peerId);
3853    return executePeerProcedure(new TransitPeerSyncReplicationStateProcedure(peerId, state));
3854  }
3855
3856  /**
3857   * Mark region server(s) as decommissioned (previously called 'draining') to prevent additional
3858   * regions from getting assigned to them. Also unload the regions on the servers asynchronously.0
3859   * @param servers Region servers to decommission.
3860   */
3861  public void decommissionRegionServers(final List<ServerName> servers, final boolean offload)
3862    throws IOException {
3863    List<ServerName> serversAdded = new ArrayList<>(servers.size());
3864    // Place the decommission marker first.
3865    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3866    for (ServerName server : servers) {
3867      try {
3868        String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3869        ZKUtil.createAndFailSilent(getZooKeeper(), node);
3870      } catch (KeeperException ke) {
3871        throw new HBaseIOException(
3872          this.zooKeeper.prefix("Unable to decommission '" + server.getServerName() + "'."), ke);
3873      }
3874      if (this.serverManager.addServerToDrainList(server)) {
3875        serversAdded.add(server);
3876      }
3877    }
3878    // Move the regions off the decommissioned servers.
3879    if (offload) {
3880      final List<ServerName> destServers = this.serverManager.createDestinationServersList();
3881      for (ServerName server : serversAdded) {
3882        final List<RegionInfo> regionsOnServer = this.assignmentManager.getRegionsOnServer(server);
3883        for (RegionInfo hri : regionsOnServer) {
3884          ServerName dest = balancer.randomAssignment(hri, destServers);
3885          if (dest == null) {
3886            throw new HBaseIOException("Unable to determine a plan to move " + hri);
3887          }
3888          RegionPlan rp = new RegionPlan(hri, server, dest);
3889          this.assignmentManager.moveAsync(rp);
3890        }
3891      }
3892    }
3893  }
3894
3895  /**
3896   * List region servers marked as decommissioned (previously called 'draining') to not get regions
3897   * assigned to them.
3898   * @return List of decommissioned servers.
3899   */
3900  public List<ServerName> listDecommissionedRegionServers() {
3901    return this.serverManager.getDrainingServersList();
3902  }
3903
3904  /**
3905   * Remove decommission marker (previously called 'draining') from a region server to allow regions
3906   * assignments. Load regions onto the server asynchronously if a list of regions is given
3907   * @param server Region server to remove decommission marker from.
3908   */
3909  public void recommissionRegionServer(final ServerName server,
3910    final List<byte[]> encodedRegionNames) throws IOException {
3911    // Remove the server from decommissioned (draining) server list.
3912    String parentZnode = getZooKeeper().getZNodePaths().drainingZNode;
3913    String node = ZNodePaths.joinZNode(parentZnode, server.getServerName());
3914    try {
3915      ZKUtil.deleteNodeFailSilent(getZooKeeper(), node);
3916    } catch (KeeperException ke) {
3917      throw new HBaseIOException(
3918        this.zooKeeper.prefix("Unable to recommission '" + server.getServerName() + "'."), ke);
3919    }
3920    this.serverManager.removeServerFromDrainList(server);
3921
3922    // Load the regions onto the server if we are given a list of regions.
3923    if (encodedRegionNames == null || encodedRegionNames.isEmpty()) {
3924      return;
3925    }
3926    if (!this.serverManager.isServerOnline(server)) {
3927      return;
3928    }
3929    for (byte[] encodedRegionName : encodedRegionNames) {
3930      RegionState regionState =
3931        assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
3932      if (regionState == null) {
3933        LOG.warn("Unknown region " + Bytes.toStringBinary(encodedRegionName));
3934        continue;
3935      }
3936      RegionInfo hri = regionState.getRegion();
3937      if (server.equals(regionState.getServerName())) {
3938        LOG.info("Skipping move of region " + hri.getRegionNameAsString()
3939          + " because region already assigned to the same server " + server + ".");
3940        continue;
3941      }
3942      RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), server);
3943      this.assignmentManager.moveAsync(rp);
3944    }
3945  }
3946
3947  @Override
3948  public LockManager getLockManager() {
3949    return lockManager;
3950  }
3951
3952  public QuotaObserverChore getQuotaObserverChore() {
3953    return this.quotaObserverChore;
3954  }
3955
3956  public SpaceQuotaSnapshotNotifier getSpaceQuotaSnapshotNotifier() {
3957    return this.spaceQuotaSnapshotNotifier;
3958  }
3959
3960  @SuppressWarnings("unchecked")
3961  private RemoteProcedure<MasterProcedureEnv, ?> getRemoteProcedure(long procId) {
3962    Procedure<?> procedure = procedureExecutor.getProcedure(procId);
3963    if (procedure == null) {
3964      return null;
3965    }
3966    assert procedure instanceof RemoteProcedure;
3967    return (RemoteProcedure<MasterProcedureEnv, ?>) procedure;
3968  }
3969
3970  public void remoteProcedureCompleted(long procId) {
3971    LOG.debug("Remote procedure done, pid={}", procId);
3972    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3973    if (procedure != null) {
3974      procedure.remoteOperationCompleted(procedureExecutor.getEnvironment());
3975    }
3976  }
3977
3978  public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
3979    LOG.debug("Remote procedure failed, pid={}", procId, error);
3980    RemoteProcedure<MasterProcedureEnv, ?> procedure = getRemoteProcedure(procId);
3981    if (procedure != null) {
3982      procedure.remoteOperationFailed(procedureExecutor.getEnvironment(), error);
3983    }
3984  }
3985
3986  /**
3987   * Reopen regions provided in the argument
3988   * @param tableName   The current table name
3989   * @param regionNames The region names of the regions to reopen
3990   * @param nonceGroup  Identifier for the source of the request, a client or process
3991   * @param nonce       A unique identifier for this operation from the client or process identified
3992   *                    by <code>nonceGroup</code> (the source must ensure each operation gets a
3993   *                    unique id).
3994   * @return procedure Id
3995   * @throws IOException if reopening region fails while running procedure
3996   */
3997  long reopenRegions(final TableName tableName, final List<byte[]> regionNames,
3998    final long nonceGroup, final long nonce) throws IOException {
3999
4000    return MasterProcedureUtil
4001      .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {
4002
4003        @Override
4004        protected void run() throws IOException {
4005          submitProcedure(new ReopenTableRegionsProcedure(tableName, regionNames));
4006        }
4007
4008        @Override
4009        protected String getDescription() {
4010          return "ReopenTableRegionsProcedure";
4011        }
4012
4013      });
4014
4015  }
4016
4017  @Override
4018  public ReplicationPeerManager getReplicationPeerManager() {
4019    return replicationPeerManager;
4020  }
4021
4022  public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
4023    getReplicationLoad(ServerName[] serverNames) {
4024    List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);
4025    if (peerList == null) {
4026      return null;
4027    }
4028    HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap =
4029      new HashMap<>(peerList.size());
4030    peerList.stream()
4031      .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>()));
4032    for (ServerName serverName : serverNames) {
4033      List<ReplicationLoadSource> replicationLoadSources =
4034        getServerManager().getLoad(serverName).getReplicationLoadSourceList();
4035      for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) {
4036        List<Pair<ServerName, ReplicationLoadSource>> replicationLoadSourceList =
4037          replicationLoadSourceMap.get(replicationLoadSource.getPeerID());
4038        if (replicationLoadSourceList == null) {
4039          LOG.debug("{} does not exist, but it exists "
4040            + "in znode(/hbase/replication/rs). when the rs restarts, peerId is deleted, so "
4041            + "we just need to ignore it", replicationLoadSource.getPeerID());
4042          continue;
4043        }
4044        replicationLoadSourceList.add(new Pair<>(serverName, replicationLoadSource));
4045      }
4046    }
4047    for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) {
4048      if (loads.size() > 0) {
4049        loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag()));
4050      }
4051    }
4052    return replicationLoadSourceMap;
4053  }
4054
4055  /**
4056   * This method modifies the master's configuration in order to inject replication-related features
4057   */
4058  @InterfaceAudience.Private
4059  public static void decorateMasterConfiguration(Configuration conf) {
4060    String plugins = conf.get(HBASE_MASTER_LOGCLEANER_PLUGINS);
4061    String cleanerClass = ReplicationLogCleaner.class.getCanonicalName();
4062    if (plugins == null || !plugins.contains(cleanerClass)) {
4063      conf.set(HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
4064    }
4065    if (ReplicationUtils.isReplicationForBulkLoadDataEnabled(conf)) {
4066      plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
4067      cleanerClass = ReplicationHFileCleaner.class.getCanonicalName();
4068      if (!plugins.contains(cleanerClass)) {
4069        conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, plugins + "," + cleanerClass);
4070      }
4071    }
4072  }
4073
4074  public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
4075    return this.snapshotQuotaChore;
4076  }
4077
4078  public ActiveMasterManager getActiveMasterManager() {
4079    return activeMasterManager;
4080  }
4081
4082  @Override
4083  public SyncReplicationReplayWALManager getSyncReplicationReplayWALManager() {
4084    return this.syncReplicationReplayWALManager;
4085  }
4086
4087  public HbckChore getHbckChore() {
4088    return this.hbckChore;
4089  }
4090
4091  @Override
4092  public void runReplicationBarrierCleaner() {
4093    ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
4094    if (rbc != null) {
4095      rbc.chore();
4096    }
4097  }
4098
4099  @Override
4100  public RSGroupInfoManager getRSGroupInfoManager() {
4101    return rsGroupInfoManager;
4102  }
4103
4104  /**
4105   * Get the compaction state of the table
4106   * @param tableName The table name
4107   * @return CompactionState Compaction state of the table
4108   */
4109  public CompactionState getCompactionState(final TableName tableName) {
4110    CompactionState compactionState = CompactionState.NONE;
4111    try {
4112      List<RegionInfo> regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
4113      for (RegionInfo regionInfo : regions) {
4114        ServerName serverName =
4115          assignmentManager.getRegionStates().getRegionServerOfRegion(regionInfo);
4116        if (serverName == null) {
4117          continue;
4118        }
4119        ServerMetrics sl = serverManager.getLoad(serverName);
4120        if (sl == null) {
4121          continue;
4122        }
4123        RegionMetrics regionMetrics = sl.getRegionMetrics().get(regionInfo.getRegionName());
4124        if (regionMetrics.getCompactionState() == CompactionState.MAJOR) {
4125          if (compactionState == CompactionState.MINOR) {
4126            compactionState = CompactionState.MAJOR_AND_MINOR;
4127          } else {
4128            compactionState = CompactionState.MAJOR;
4129          }
4130        } else if (regionMetrics.getCompactionState() == CompactionState.MINOR) {
4131          if (compactionState == CompactionState.MAJOR) {
4132            compactionState = CompactionState.MAJOR_AND_MINOR;
4133          } else {
4134            compactionState = CompactionState.MINOR;
4135          }
4136        }
4137      }
4138    } catch (Exception e) {
4139      compactionState = null;
4140      LOG.error("Exception when get compaction state for " + tableName.getNameAsString(), e);
4141    }
4142    return compactionState;
4143  }
4144
4145  @Override
4146  public MetaLocationSyncer getMetaLocationSyncer() {
4147    return metaLocationSyncer;
4148  }
4149
4150  @RestrictedApi(explanation = "Should only be called in tests", link = "",
4151      allowedOnPath = ".*/src/test/.*")
4152  public MasterRegion getMasterRegion() {
4153    return masterRegion;
4154  }
4155
4156  @Override
4157  public void onConfigurationChange(Configuration newConf) {
4158    try {
4159      Superusers.initialize(newConf);
4160    } catch (IOException e) {
4161      LOG.warn("Failed to initialize SuperUsers on reloading of the configuration");
4162    }
4163    // append the quotas observer back to the master coprocessor key
4164    setQuotasObserver(newConf);
4165    // update region server coprocessor if the configuration has changed.
4166    if (
4167      CoprocessorConfigurationUtil.checkConfigurationChange(getConfiguration(), newConf,
4168        CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY) && !maintenanceMode
4169    ) {
4170      LOG.info("Update the master coprocessor(s) because the configuration has changed");
4171      initializeCoprocessorHost(newConf);
4172    }
4173  }
4174
4175  @Override
4176  protected NamedQueueRecorder createNamedQueueRecord() {
4177    final boolean isBalancerDecisionRecording =
4178      conf.getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED,
4179        BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED);
4180    final boolean isBalancerRejectionRecording =
4181      conf.getBoolean(BaseLoadBalancer.BALANCER_REJECTION_BUFFER_ENABLED,
4182        BaseLoadBalancer.DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED);
4183    if (isBalancerDecisionRecording || isBalancerRejectionRecording) {
4184      return NamedQueueRecorder.getInstance(conf);
4185    } else {
4186      return null;
4187    }
4188  }
4189
4190  @Override
4191  protected boolean clusterMode() {
4192    return true;
4193  }
4194
4195  public String getClusterId() {
4196    if (activeMaster) {
4197      return clusterId;
4198    }
4199    return cachedClusterId.getFromCacheOrFetch();
4200  }
4201
4202  public Optional<ServerName> getActiveMaster() {
4203    return activeMasterManager.getActiveMasterServerName();
4204  }
4205
4206  public List<ServerName> getBackupMasters() {
4207    return activeMasterManager.getBackupMasters();
4208  }
4209
4210  @Override
4211  public Iterator<ServerName> getBootstrapNodes() {
4212    return regionServerTracker.getRegionServers().iterator();
4213  }
4214
4215  @Override
4216  public List<HRegionLocation> getMetaLocations() {
4217    return metaRegionLocationCache.getMetaRegionLocations();
4218  }
4219
4220  @Override
4221  public void flushMasterStore() throws IOException {
4222    LOG.info("Force flush master local region.");
4223    if (this.cpHost != null) {
4224      try {
4225        cpHost.preMasterStoreFlush();
4226      } catch (IOException ioe) {
4227        LOG.error("Error invoking master coprocessor preMasterStoreFlush()", ioe);
4228      }
4229    }
4230    masterRegion.flush(true);
4231    if (this.cpHost != null) {
4232      try {
4233        cpHost.postMasterStoreFlush();
4234      } catch (IOException ioe) {
4235        LOG.error("Error invoking master coprocessor postMasterStoreFlush()", ioe);
4236      }
4237    }
4238  }
4239
4240  public Collection<ServerName> getLiveRegionServers() {
4241    return regionServerTracker.getRegionServers();
4242  }
4243
4244  @RestrictedApi(explanation = "Should only be called in tests", link = "",
4245      allowedOnPath = ".*/src/test/.*")
4246  void setLoadBalancer(RSGroupBasedLoadBalancer loadBalancer) {
4247    this.balancer = loadBalancer;
4248  }
4249
4250  @RestrictedApi(explanation = "Should only be called in tests", link = "",
4251      allowedOnPath = ".*/src/test/.*")
4252  void setAssignmentManager(AssignmentManager assignmentManager) {
4253    this.assignmentManager = assignmentManager;
4254  }
4255
4256  @RestrictedApi(explanation = "Should only be called in tests", link = "",
4257      allowedOnPath = ".*/src/test/.*")
4258  static void setDisableBalancerChoreForTest(boolean disable) {
4259    disableBalancerChoreForTest = disable;
4260  }
4261
4262  @RestrictedApi(explanation = "Should only be called in tests", link = "",
4263      allowedOnPath = ".*/src/test/.*")
4264  public ConfigurationManager getConfigurationManager() {
4265    return configurationManager;
4266  }
4267
4268  private void setQuotasObserver(Configuration conf) {
4269    // Add the Observer to delete quotas on table deletion before starting all CPs by
4270    // default with quota support, avoiding if user specifically asks to not load this Observer.
4271    if (QuotaUtil.isQuotaEnabled(conf)) {
4272      updateConfigurationForQuotasObserver(conf);
4273    }
4274  }
4275
4276  private void initializeCoprocessorHost(Configuration conf) {
4277    // initialize master side coprocessors before we start handling requests
4278    this.cpHost = new MasterCoprocessorHost(this, conf);
4279  }
4280
4281}