@InterfaceAudience.Private public class ServerManager extends Object
Maintains lists of online and dead servers. Processes the startups, shutdowns, and deaths of region servers.
Servers are distinguished in two different ways. A given server has a location, specified by hostname and port, and of which there can only be one online at any given time. A server instance is specified by the location (hostname and port) as well as the startcode (timestamp from when the server was started). This is used to differentiate a restarted instance of a given server from the original instance.
If a sever is known not to be running any more, it is called dead. The dead server needs to be handled by a ServerShutdownHandler. If the handler is not enabled yet, the server can't be handled right away so it is queued up. After the handler is enabled, the server will be submitted to a handler to handle. However, the handler may be just partially enabled. If so, the server cannot be fully processed, and be queued up for further processing. A server is fully processed only after the handler is fully enabled and has completed the handling.
Modifier and Type | Field and Description |
---|---|
private boolean |
clusterShutdown |
private ClusterConnection |
connection |
private DeadServer |
deadservers |
private ArrayList<ServerName> |
drainingServers
List of region servers
|
private ConcurrentNavigableMap<byte[],Long> |
flushedSequenceIdByRegion
The last flushed sequence id for a region.
|
private List<ServerListener> |
listeners
Listeners that are called on server events.
|
private static org.apache.commons.logging.Log |
LOG |
private Server |
master |
private long |
maxSkew |
private ConcurrentHashMap<ServerName,ServerLoad> |
onlineServers
Map of registered servers to their current load
|
private RetryCounterFactory |
pingRetryCounterFactory |
private Set<ServerName> |
queuedDeadServers
Set of region servers which are dead but not processed immediately.
|
private Map<ServerName,Boolean> |
requeuedDeadServers
Set of region servers which are dead and submitted to ServerShutdownHandler to process but not
fully processed immediately.
|
private RpcControllerFactory |
rpcControllerFactory |
private Map<ServerName,org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface> |
rsAdmins
Map of admin interfaces per registered regionserver; these interfaces we use to control
regionservers out on the cluster
|
private MasterServices |
services |
private ConcurrentNavigableMap<byte[],ConcurrentNavigableMap<byte[],Long>> |
storeFlushedSequenceIdsByRegion
The last flushed sequence id for a store in a region.
|
static String |
WAIT_ON_REGIONSERVERS_INTERVAL |
static String |
WAIT_ON_REGIONSERVERS_MAXTOSTART |
static String |
WAIT_ON_REGIONSERVERS_MINTOSTART |
static String |
WAIT_ON_REGIONSERVERS_TIMEOUT |
private long |
warningSkew |
Constructor and Description |
---|
ServerManager(Server master,
MasterServices services)
Constructor.
|
ServerManager(Server master,
MasterServices services,
boolean connect) |
Modifier and Type | Method and Description |
---|---|
boolean |
addServerToDrainList(ServerName sn) |
boolean |
areDeadServersInProgress()
Checks if any dead servers are currently in progress.
|
(package private) boolean |
checkAndRecordNewServer(ServerName serverName,
ServerLoad sl)
Check is a server of same host and port already exists,
if not, or the existed one got a smaller start code, record it.
|
private void |
checkClockSkew(ServerName serverName,
long serverCurrentTime)
Checks if the clock skew between the server and the master.
|
private void |
checkIsDead(ServerName serverName,
String what)
If this server is on the dead list, reject it with a YouAreDeadException.
|
(package private) void |
clearDeadServersWithSameHostNameAndPortOfOnlineServer()
To clear any dead server with same host name and port of any online server
|
static void |
closeRegionSilentlyAndWait(ClusterConnection connection,
ServerName server,
HRegionInfo region,
long timeout)
Contacts a region server and waits up to timeout ms
to close the region.
|
int |
countOfRegionServers() |
List<ServerName> |
createDestinationServersList()
Calls
createDestinationServersList(org.apache.hadoop.hbase.ServerName) without server to exclude. |
List<ServerName> |
createDestinationServersList(ServerName serverToExclude)
Creates a list of possible destinations for a region.
|
void |
expireServer(ServerName serverName) |
private ServerName |
findServerWithSameHostnamePortWithLock(ServerName serverName)
Assumes onlineServers is locked.
|
double |
getAverageLoad()
Compute the average load across all region servers.
|
(package private) Set<ServerName> |
getDeadNotExpiredServers() |
DeadServer |
getDeadServers() |
List<ServerName> |
getDrainingServersList() |
org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds |
getLastFlushedSequenceId(byte[] encodedRegionName) |
ServerLoad |
getLoad(ServerName serverName) |
Map<ServerName,ServerLoad> |
getOnlineServers() |
List<ServerName> |
getOnlineServersList() |
private ConcurrentNavigableMap<byte[],Long> |
getOrCreateStoreFlushedSequenceId(byte[] regionName) |
(package private) Map<ServerName,Boolean> |
getRequeuedDeadServers() |
private org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface |
getRsAdmin(ServerName sn) |
boolean |
isClusterShutdown() |
boolean |
isRegionInServerManagerStates(HRegionInfo hri) |
boolean |
isServerDead(ServerName serverName)
Check if a server is known to be dead.
|
boolean |
isServerOnline(ServerName serverName) |
boolean |
isServerReachable(ServerName server)
Check if a region server is reachable and has the expected start code
|
boolean |
isServerWithSameHostnamePortOnline(ServerName serverName)
Check whether a server is online based on hostname and port
|
(package private) void |
letRegionServersShutdown() |
void |
moveFromOnelineToDeadServers(ServerName sn) |
private PayloadCarryingRpcController |
newRpcController() |
void |
processDeadServer(ServerName serverName,
boolean shouldSplitWal) |
(package private) void |
processQueuedDeadServers()
Process the servers which died during master's initialization.
|
(package private) void |
recordNewServerWithLock(ServerName serverName,
ServerLoad sl)
Adds the onlineServers list.
|
(package private) void |
regionServerReport(ServerName sn,
ServerLoad sl) |
(package private) ServerName |
regionServerStartup(org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest request,
InetAddress ia)
Let the server manager know a new regionserver has come online
|
void |
registerListener(ServerListener listener)
Add the listener to the notification list.
|
(package private) void |
removeDeadNotExpiredServers(List<ServerName> servers)
Loop through the deadNotExpired server list and remove them from the
servers.
|
void |
removeRegion(HRegionInfo regionInfo)
Called by delete table and similar to notify the ServerManager that a region was removed.
|
void |
removeRegions(List<HRegionInfo> regions)
Called by delete table and similar to notify the ServerManager that a region was removed.
|
(package private) void |
removeRequeuedDeadServers()
During startup, if we figure it is not a failover, i.e.
|
boolean |
removeServerFromDrainList(ServerName sn) |
boolean |
sendRegionClose(ServerName server,
HRegionInfo region,
int versionOfClosingNode) |
boolean |
sendRegionClose(ServerName server,
HRegionInfo region,
int versionOfClosingNode,
ServerName dest,
boolean transitionInZK)
Sends an CLOSE RPC to the specified server to close the specified region.
|
RegionOpeningState |
sendRegionOpen(ServerName server,
HRegionInfo region,
int versionOfOfflineNode,
List<ServerName> favoredNodes)
Sends an OPEN RPC to the specified server to open the specified region.
|
List<RegionOpeningState> |
sendRegionOpen(ServerName server,
List<Triple<HRegionInfo,Integer,List<ServerName>>> regionOpenInfos)
Sends an OPEN RPC to the specified server to open the specified region.
|
void |
sendRegionsMerge(ServerName server,
HRegionInfo region_a,
HRegionInfo region_b,
boolean forcible)
Sends an MERGE REGIONS RPC to the specified server to merge the specified
regions.
|
void |
sendRegionWarmup(ServerName server,
HRegionInfo region)
Sends a WARMUP RPC to the specified server to warmup the specified region.
|
void |
shutdownCluster() |
void |
stop()
Stop the ServerManager.
|
boolean |
unregisterListener(ServerListener listener)
Remove the listener from the notification list.
|
private void |
updateLastFlushedSequenceIds(ServerName sn,
ServerLoad hsl)
Updates last flushed sequence Ids for the regions on server sn
|
void |
waitForRegionServers(MonitoredTask status)
Wait for the region servers to report in.
|
public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART
public static final String WAIT_ON_REGIONSERVERS_MINTOSTART
public static final String WAIT_ON_REGIONSERVERS_TIMEOUT
public static final String WAIT_ON_REGIONSERVERS_INTERVAL
private static final org.apache.commons.logging.Log LOG
private volatile boolean clusterShutdown
private final ConcurrentNavigableMap<byte[],Long> flushedSequenceIdByRegion
private final ConcurrentNavigableMap<byte[],ConcurrentNavigableMap<byte[],Long>> storeFlushedSequenceIdsByRegion
private final ConcurrentHashMap<ServerName,ServerLoad> onlineServers
private final Map<ServerName,org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface> rsAdmins
private final ArrayList<ServerName> drainingServers
private final Server master
private final MasterServices services
private final ClusterConnection connection
private final DeadServer deadservers
private final long maxSkew
private final long warningSkew
private final RetryCounterFactory pingRetryCounterFactory
private final RpcControllerFactory rpcControllerFactory
private Set<ServerName> queuedDeadServers
processQueuedDeadServers()
by master.
A dead server is a server instance known to be dead, not listed in the /hbase/rs znode any more. It may have not been submitted to ServerShutdownHandler yet because the handler is not enabled.
A dead server, which has been submitted to ServerShutdownHandler while the handler is not enabled, is queued up.
So this is a set of region servers known to be dead but not submitted to ServerShutdownHander for processing yet.
private Map<ServerName,Boolean> requeuedDeadServers
If one server died before assignment manager finished the failover cleanup, the server will be
added to this set and will be processed through calling
processQueuedDeadServers()
by assignment manager.
The Boolean value indicates whether log split is needed inside ServerShutdownHandler
ServerShutdownHandler processes a dead server submitted to the handler after the handler is enabled. It may not be able to complete the processing because meta is not yet online or master is currently in startup mode. In this case, the dead server will be parked in this set temporarily.
private List<ServerListener> listeners
public ServerManager(Server master, MasterServices services) throws IOException
master
- services
- ZooKeeperConnectionException
IOException
ServerManager(Server master, MasterServices services, boolean connect) throws IOException
IOException
public void registerListener(ServerListener listener)
listener
- The ServerListener to registerpublic boolean unregisterListener(ServerListener listener)
listener
- The ServerListener to unregisterServerName regionServerStartup(org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest request, InetAddress ia) throws IOException
request
- the startup requestia
- the InetAddress from which request is receivedIOException
private ConcurrentNavigableMap<byte[],Long> getOrCreateStoreFlushedSequenceId(byte[] regionName)
private void updateLastFlushedSequenceIds(ServerName sn, ServerLoad hsl)
sn
- hsl
- void regionServerReport(ServerName sn, ServerLoad sl) throws YouAreDeadException
YouAreDeadException
boolean checkAndRecordNewServer(ServerName serverName, ServerLoad sl)
sn
- the server to check and recordsl
- the server load on the serverprivate void checkClockSkew(ServerName serverName, long serverCurrentTime) throws ClockOutOfSyncException
serverName
- Incoming servers's nameserverCurrentTime
- ClockOutOfSyncException
- if the skew exceeds the configured max valueprivate void checkIsDead(ServerName serverName, String what) throws YouAreDeadException
serverName
- what
- START or REPORTYouAreDeadException
private ServerName findServerWithSameHostnamePortWithLock(ServerName serverName)
void recordNewServerWithLock(ServerName serverName, ServerLoad sl)
serverName
- The remote servers name.sl
- public org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds getLastFlushedSequenceId(byte[] encodedRegionName)
public ServerLoad getLoad(ServerName serverName)
serverName
- public double getAverageLoad()
public int countOfRegionServers()
public Map<ServerName,ServerLoad> getOnlineServers()
public DeadServer getDeadServers()
public boolean areDeadServersInProgress()
void letRegionServersShutdown()
public void expireServer(ServerName serverName)
public void moveFromOnelineToDeadServers(ServerName sn)
public void processDeadServer(ServerName serverName, boolean shouldSplitWal)
void processQueuedDeadServers()
public boolean removeServerFromDrainList(ServerName sn)
public boolean addServerToDrainList(ServerName sn)
public RegionOpeningState sendRegionOpen(ServerName server, HRegionInfo region, int versionOfOfflineNode, List<ServerName> favoredNodes) throws IOException
Open should not fail but can if server just crashed.
server
- server to open a regionregion
- region to openversionOfOfflineNode
- that needs to be present in the offline node
when RS tries to change the state from OFFLINE to other states.favoredNodes
- IOException
public List<RegionOpeningState> sendRegionOpen(ServerName server, List<Triple<HRegionInfo,Integer,List<ServerName>>> regionOpenInfos) throws IOException
Open should not fail but can if server just crashed.
server
- server to open a regionregionOpenInfos
- info of a list of regions to openIOException
private PayloadCarryingRpcController newRpcController()
public boolean sendRegionClose(ServerName server, HRegionInfo region, int versionOfClosingNode, ServerName dest, boolean transitionInZK) throws IOException
A region server could reject the close request because it either does not have the specified region or the region is being split.
server
- server to open a regionregion
- region to openversionOfClosingNode
- the version of znode to compare when RS transitions the znode from
CLOSING state.dest
- - if the region is moved to another server, the destination server. null otherwise.IOException
public boolean sendRegionClose(ServerName server, HRegionInfo region, int versionOfClosingNode) throws IOException
IOException
public void sendRegionWarmup(ServerName server, HRegionInfo region)
A region server could reject the close request because it either does not have the specified region or the region is being split.
server
- server to warmup a regionregion
- region to warmuppublic static void closeRegionSilentlyAndWait(ClusterConnection connection, ServerName server, HRegionInfo region, long timeout) throws IOException, InterruptedException
IOException
InterruptedException
public void sendRegionsMerge(ServerName server, HRegionInfo region_a, HRegionInfo region_b, boolean forcible) throws IOException
A region server could reject the close request because it either does not have the specified region.
server
- server to merge regionsregion_a
- region to mergeregion_b
- region to mergeforcible
- true if do a compulsory merge, otherwise we will only merge
two adjacent regionsIOException
public boolean isServerReachable(ServerName server)
private org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface getRsAdmin(ServerName sn) throws IOException
sn
- sn
IOException
RetriesExhaustedException
- wrapping a ConnectException if failedpublic void waitForRegionServers(MonitoredTask status) throws InterruptedException
InterruptedException
public List<ServerName> getOnlineServersList()
public List<ServerName> getDrainingServersList()
Set<ServerName> getDeadNotExpiredServers()
void removeRequeuedDeadServers()
Map<ServerName,Boolean> getRequeuedDeadServers()
public boolean isServerOnline(ServerName serverName)
public boolean isServerWithSameHostnamePortOnline(ServerName serverName)
public boolean isServerDead(ServerName serverName)
public void shutdownCluster()
public boolean isClusterShutdown()
public void stop()
public List<ServerName> createDestinationServersList(ServerName serverToExclude)
serverToExclude
- can be null if there is no server to excludepublic List<ServerName> createDestinationServersList()
createDestinationServersList(org.apache.hadoop.hbase.ServerName)
without server to exclude.void removeDeadNotExpiredServers(List<ServerName> servers)
createDestinationServersList()
instead of managing you own list.void clearDeadServersWithSameHostNameAndPortOfOnlineServer()
public void removeRegion(HRegionInfo regionInfo)
public boolean isRegionInServerManagerStates(HRegionInfo hri)
public void removeRegions(List<HRegionInfo> regions)
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.