@InterfaceAudience.Private public class ServerManager extends Object implements ConfigurationObserver
Maintains lists of online and dead servers. Processes the startups, shutdowns, and deaths of region servers.
Servers are distinguished in two different ways. A given server has a location, specified by hostname and port, and of which there can only be one online at any given time. A server instance is specified by the location (hostname and port) as well as the startcode (timestamp from when the server was started). This is used to differentiate a restarted instance of a given server from the original instance.
If a sever is known not to be running any more, it is called dead. The dead server needs to be handled by a ServerShutdownHandler. If the handler is not enabled yet, the server can't be handled right away so it is queued up. After the handler is enabled, the server will be submitted to a handler to handle. However, the handler may be just partially enabled. If so, the server cannot be fully processed, and be queued up for further processing. A server is fully processed only after the handler is fully enabled and has completed the handling.
Modifier and Type | Class and Description |
---|---|
private class |
ServerManager.FlushedSequenceIdFlusher |
static class |
ServerManager.ServerLiveState |
Modifier and Type | Field and Description |
---|---|
private AtomicBoolean |
clusterShutdown |
private DeadServer |
deadservers |
private ArrayList<ServerName> |
drainingServers
List of region servers that should not get any more new regions.
|
private ServerManager.FlushedSequenceIdFlusher |
flushedSeqIdFlusher |
static String |
FLUSHEDSEQUENCEID_FLUSHER_INTERVAL |
static int |
FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT |
private ConcurrentNavigableMap<byte[],Long> |
flushedSequenceIdByRegion
The last flushed sequence id for a region.
|
private boolean |
isFlushSeqIdPersistInProgress |
private static String |
LAST_FLUSHED_SEQ_ID_FILE
File on hdfs to store last flushed sequence id of regions
|
private List<ServerListener> |
listeners
Listeners that are called on server events.
|
private static org.slf4j.Logger |
LOG |
private MasterServices |
master |
static String |
MAX_CLOCK_SKEW_MS |
private long |
maxSkew |
private ConcurrentNavigableMap<ServerName,ServerMetrics> |
onlineServers
Map of registered servers to their current load
|
static String |
PERSIST_FLUSHEDSEQUENCEID
see HBASE-20727 if set to true, flushedSequenceIdByRegion and storeFlushedSequenceIdsByRegion
will be persisted to HDFS and loaded when master restart to speed up log split
|
static boolean |
PERSIST_FLUSHEDSEQUENCEID_DEFAULT |
private boolean |
persistFlushedSequenceId |
private boolean |
rejectDecommissionedHostsConfig
Configured value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY
|
private RegionServerList |
storage |
private ConcurrentNavigableMap<byte[],ConcurrentNavigableMap<byte[],Long>> |
storeFlushedSequenceIdsByRegion
The last flushed sequence id for a store in a region.
|
static String |
WAIT_ON_REGIONSERVERS_INTERVAL |
static String |
WAIT_ON_REGIONSERVERS_MAXTOSTART |
static String |
WAIT_ON_REGIONSERVERS_MINTOSTART |
static String |
WAIT_ON_REGIONSERVERS_TIMEOUT |
private long |
warningSkew |
Constructor and Description |
---|
ServerManager(MasterServices master,
RegionServerList storage)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
boolean |
addServerToDrainList(ServerName sn)
Add the server to the drain list.
|
boolean |
areDeadServersInProgress()
Checks if any dead servers are currently in progress.
|
(package private) boolean |
checkAndRecordNewServer(ServerName serverName,
ServerMetrics sl)
Check is a server of same host and port already exists, if not, or the existed one got a
smaller start code, record it.
|
private void |
checkClockSkew(ServerName serverName,
long serverCurrentTime)
Checks if the clock skew between the server and the master.
|
private void |
checkIsDead(ServerName serverName,
String what)
Called when RegionServer first reports in for duty and thereafter each time it heartbeats to
make sure it is has not been figured for dead.
|
private void |
checkRejectableDecommissionedStatus(ServerName sn)
Checks if the Master is configured to reject decommissioned hosts or not.
|
(package private) void |
clearDeadServersWithSameHostNameAndPortOfOnlineServer()
To clear any dead server with same host name and port of any online server
|
static void |
closeRegionSilentlyAndWait(AsyncClusterConnection connection,
ServerName server,
RegionInfo region,
long timeout)
Contacts a region server and waits up to timeout ms to close the region.
|
int |
countOfRegionServers()
Returns the count of active regionservers
|
List<ServerName> |
createDestinationServersList()
Calls
createDestinationServersList(java.util.List<org.apache.hadoop.hbase.ServerName>) without server to exclude. |
List<ServerName> |
createDestinationServersList(List<ServerName> serversToExclude)
Creates a list of possible destinations for a region.
|
long |
expireServer(ServerName serverName)
Expire the passed server.
|
(package private) long |
expireServer(ServerName serverName,
boolean force) |
(package private) void |
findDeadServersAndProcess(Set<ServerName> deadServersFromPE,
Set<ServerName> liveServersFromWALDir)
Find out the region servers crashed between the crash of the previous master instance and the
current master instance and schedule SCP for them.
|
ServerName |
findServerWithSameHostnamePortWithLock(ServerName serverName)
Assumes onlineServers is locked.
|
double |
getAverageLoad()
Compute the average load across all region servers.
|
DeadServer |
getDeadServers() |
List<ServerName> |
getDrainingServersList()
Returns A copy of the internal list of draining servers.
|
ConcurrentNavigableMap<byte[],Long> |
getFlushedSequenceIdByRegion() |
int |
getInfoPort(ServerName serverName) |
org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds |
getLastFlushedSequenceId(byte[] encodedRegionName) |
ServerMetrics |
getLoad(ServerName serverName)
Returns ServerMetrics if serverName is known else null
|
private int |
getMinToStart()
Calculate min necessary to start.
|
Map<ServerName,ServerMetrics> |
getOnlineServers()
Returns Read-only map of servers to serverinfo
|
List<ServerName> |
getOnlineServersList()
Returns A copy of the internal list of online servers.
|
List<ServerName> |
getOnlineServersListWithPredicator(List<ServerName> keys,
Predicate<ServerMetrics> idleServerPredicator) |
private List<String> |
getRegionServersInZK(ZKWatcher zkw) |
boolean |
getRejectDecommissionedHostsConfig(org.apache.hadoop.conf.Configuration conf)
Reads the value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY from the config and returns it
|
private String |
getStrForMax(int max) |
String |
getVersion(ServerName serverName)
May return "0.0.0" when server is not online
|
int |
getVersionNumber(ServerName serverName)
May return 0 when server is not online.
|
boolean |
isClusterShutdown() |
boolean |
isRegionInServerManagerStates(RegionInfo hri) |
boolean |
isServerDead(ServerName serverName)
Check if a server is known to be dead.
|
ServerManager.ServerLiveState |
isServerKnownAndOnline(ServerName serverName)
Returns whether the server is online, dead, or unknown.
|
boolean |
isServerOnline(ServerName serverName) |
boolean |
isServerUnknown(ServerName serverName)
Check if a server is unknown.
|
(package private) void |
letRegionServersShutdown() |
void |
loadLastFlushedSequenceIds()
Load last flushed sequence id of each region from HDFS, if persisted
|
void |
moveFromOnlineToDeadServers(ServerName sn)
Called when server has expired.
|
void |
onConfigurationChange(org.apache.hadoop.conf.Configuration conf)
Implementation of the ConfigurationObserver interface.
|
private void |
persistRegionLastFlushedSequenceIds()
Persist last flushed sequence id of each region to HDFS
|
(package private) void |
recordNewServerWithLock(ServerName serverName,
ServerMetrics sl)
Adds the onlineServers list.
|
void |
regionServerReport(ServerName sn,
ServerMetrics sl) |
(package private) ServerName |
regionServerStartup(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest request,
int versionNumber,
String version,
InetAddress ia)
Let the server manager know a new regionserver has come online
|
void |
registerListener(ServerListener listener)
Add the listener to the notification list.
|
void |
removeDeletedRegionFromLoadedFlushedSequenceIds()
Regions may have been removed between latest persist of FlushedSequenceIds and master abort.
|
void |
removeRegion(RegionInfo regionInfo)
Called by delete table and similar to notify the ServerManager that a region was removed.
|
void |
removeRegions(List<RegionInfo> regions)
Called by delete table and similar to notify the ServerManager that a region was removed.
|
boolean |
removeServerFromDrainList(ServerName sn) |
void |
shutdownCluster() |
void |
startChore()
start chore in ServerManager
|
void |
stop()
Stop the ServerManager.
|
boolean |
unregisterListener(ServerListener listener)
Remove the listener from the notification list.
|
private void |
updateLastFlushedSequenceIds(ServerName sn,
ServerMetrics hsl)
Updates last flushed sequence Ids for the regions on server sn
|
void |
waitForRegionServers(MonitoredTask status)
Wait for the region servers to report in.
|
public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART
public static final String WAIT_ON_REGIONSERVERS_MINTOSTART
public static final String WAIT_ON_REGIONSERVERS_TIMEOUT
public static final String WAIT_ON_REGIONSERVERS_INTERVAL
public static final String PERSIST_FLUSHEDSEQUENCEID
public static final boolean PERSIST_FLUSHEDSEQUENCEID_DEFAULT
public static final String FLUSHEDSEQUENCEID_FLUSHER_INTERVAL
public static final int FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT
public static final String MAX_CLOCK_SKEW_MS
private static final org.slf4j.Logger LOG
private AtomicBoolean clusterShutdown
private final ConcurrentNavigableMap<byte[],Long> flushedSequenceIdByRegion
private boolean persistFlushedSequenceId
private volatile boolean isFlushSeqIdPersistInProgress
private static final String LAST_FLUSHED_SEQ_ID_FILE
private ServerManager.FlushedSequenceIdFlusher flushedSeqIdFlusher
private final ConcurrentNavigableMap<byte[],ConcurrentNavigableMap<byte[],Long>> storeFlushedSequenceIdsByRegion
private final ConcurrentNavigableMap<ServerName,ServerMetrics> onlineServers
private final ArrayList<ServerName> drainingServers
private final MasterServices master
private final RegionServerList storage
private final DeadServer deadservers
private final long maxSkew
private final long warningSkew
private List<ServerListener> listeners
private volatile boolean rejectDecommissionedHostsConfig
public ServerManager(MasterServices master, RegionServerList storage)
public void onConfigurationChange(org.apache.hadoop.conf.Configuration conf)
onConfigurationChange
in interface ConfigurationObserver
conf
- Server configuration instancepublic boolean getRejectDecommissionedHostsConfig(org.apache.hadoop.conf.Configuration conf)
conf
- Configuration instance of the Masterpublic void registerListener(ServerListener listener)
listener
- The ServerListener to registerpublic boolean unregisterListener(ServerListener listener)
listener
- The ServerListener to unregisterServerName regionServerStartup(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest request, int versionNumber, String version, InetAddress ia) throws IOException
request
- the startup requestversionNumber
- the version number of the new regionserverversion
- the version of the new regionserver, could contain strings like "SNAPSHOT"ia
- the InetAddress from which request is receivedIOException
private void updateLastFlushedSequenceIds(ServerName sn, ServerMetrics hsl)
public void regionServerReport(ServerName sn, ServerMetrics sl) throws YouAreDeadException
YouAreDeadException
private void checkRejectableDecommissionedStatus(ServerName sn) throws DecommissionedHostRejectedException
sn
- The ServerName to check forDecommissionedHostRejectedException
- if the Master is configured to reject
decommissioned hosts and this host exists in the
list of the decommissioned serversboolean checkAndRecordNewServer(ServerName serverName, ServerMetrics sl)
serverName
- the server to check and recordsl
- the server load on the servervoid findDeadServersAndProcess(Set<ServerName> deadServersFromPE, Set<ServerName> liveServersFromWALDir)
RegionServerTracker
has already helped us to construct the online servers set
by scanning zookeeper, now we can compare the online servers with liveServersFromWALDir
to find out whether there are servers which are already dead.
Must be called inside the initialization method of RegionServerTracker
to avoid
concurrency issue.deadServersFromPE
- the region servers which already have a SCP associated.liveServersFromWALDir
- the live region servers from wal directory.private void checkClockSkew(ServerName serverName, long serverCurrentTime) throws ClockOutOfSyncException
serverName
- Incoming servers's nameClockOutOfSyncException
- if the skew exceeds the configured max valueprivate void checkIsDead(ServerName serverName, String what) throws YouAreDeadException
what
- START or REPORTYouAreDeadException
public ServerName findServerWithSameHostnamePortWithLock(ServerName serverName)
void recordNewServerWithLock(ServerName serverName, ServerMetrics sl)
serverName
- The remote servers name.public ConcurrentNavigableMap<byte[],Long> getFlushedSequenceIdByRegion()
public org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds getLastFlushedSequenceId(byte[] encodedRegionName)
public ServerMetrics getLoad(ServerName serverName)
public double getAverageLoad()
public int countOfRegionServers()
public Map<ServerName,ServerMetrics> getOnlineServers()
public DeadServer getDeadServers()
public boolean areDeadServersInProgress() throws IOException
IOException
void letRegionServersShutdown()
private List<String> getRegionServersInZK(ZKWatcher zkw) throws org.apache.zookeeper.KeeperException
org.apache.zookeeper.KeeperException
public long expireServer(ServerName serverName)
Procedure.NO_PROC_ID
if we did not
(could happen for many reasons including the fact that its this server that is going
down or we already have queued an SCP for this server or SCP processing is currently
disabled because we are in startup phase).long expireServer(ServerName serverName, boolean force)
public void moveFromOnlineToDeadServers(ServerName sn)
public boolean removeServerFromDrainList(ServerName sn)
public boolean addServerToDrainList(ServerName sn)
public static void closeRegionSilentlyAndWait(AsyncClusterConnection connection, ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException
IOException
InterruptedException
private int getMinToStart()
public void waitForRegionServers(MonitoredTask status) throws InterruptedException
InterruptedException
private String getStrForMax(int max)
public List<ServerName> getOnlineServersList()
public List<ServerName> getOnlineServersListWithPredicator(List<ServerName> keys, Predicate<ServerMetrics> idleServerPredicator)
keys
- The target server nameidleServerPredicator
- Evaluates the server on the given loadpublic List<ServerName> getDrainingServersList()
public boolean isServerOnline(ServerName serverName)
public ServerManager.ServerLiveState isServerKnownAndOnline(ServerName serverName)
public boolean isServerDead(ServerName serverName)
public boolean isServerUnknown(ServerName serverName)
public void shutdownCluster()
public boolean isClusterShutdown()
public void startChore()
public void stop()
public List<ServerName> createDestinationServersList(List<ServerName> serversToExclude)
serversToExclude
- can be null if there is no server to excludepublic List<ServerName> createDestinationServersList()
createDestinationServersList(java.util.List<org.apache.hadoop.hbase.ServerName>)
without server to exclude.void clearDeadServersWithSameHostNameAndPortOfOnlineServer()
public void removeRegion(RegionInfo regionInfo)
public boolean isRegionInServerManagerStates(RegionInfo hri)
public void removeRegions(List<RegionInfo> regions)
public int getVersionNumber(ServerName serverName)
public String getVersion(ServerName serverName)
public int getInfoPort(ServerName serverName)
private void persistRegionLastFlushedSequenceIds() throws IOException
IOException
- if persit to HDFS failspublic void loadLastFlushedSequenceIds() throws IOException
IOException
public void removeDeletedRegionFromLoadedFlushedSequenceIds()
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.