001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.master; 020 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.Comparator; 024import java.util.Date; 025import java.util.HashMap; 026import java.util.HashSet; 027import java.util.Iterator; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 033import org.apache.hadoop.hbase.util.Pair; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 039 040 041/** 042 * Class to hold dead servers list and utility querying dead server list. 043 * On znode expiration, servers are added here. 044 */ 045@InterfaceAudience.Private 046public class DeadServer { 047 private static final Logger LOG = LoggerFactory.getLogger(DeadServer.class); 048 049 /** 050 * Set of known dead servers. On znode expiration, servers are added here. 051 * This is needed in case of a network partitioning where the server's lease 052 * expires, but the server is still running. After the network is healed, 053 * and it's server logs are recovered, it will be told to call server startup 054 * because by then, its regions have probably been reassigned. 055 */ 056 private final Map<ServerName, Long> deadServers = new HashMap<>(); 057 058 /** 059 * Set of dead servers currently being processed 060 */ 061 private final Set<ServerName> processingServers = new HashSet<ServerName>(); 062 063 /** 064 * Handles restart of a server. The new server instance has a different start code. 065 * The new start code should be greater than the old one. We don't check that here. 066 * 067 * @param newServerName Servername as either <code>host:port</code> or 068 * <code>host,port,startcode</code>. 069 * @return true if this server was dead before and coming back alive again 070 */ 071 public synchronized boolean cleanPreviousInstance(final ServerName newServerName) { 072 Iterator<ServerName> it = deadServers.keySet().iterator(); 073 while (it.hasNext()) { 074 ServerName sn = it.next(); 075 if (ServerName.isSameAddress(sn, newServerName)) { 076 // remove from deadServers 077 it.remove(); 078 // remove from processingServers 079 boolean removed = processingServers.remove(sn); 080 if (removed) { 081 LOG.debug("Removed {}, processing={}, numProcessing={}", sn, removed, 082 processingServers.size()); 083 } 084 return true; 085 } 086 } 087 088 return false; 089 } 090 091 /** 092 * @param serverName server name. 093 * @return true if this server is on the dead servers list false otherwise 094 */ 095 public synchronized boolean isDeadServer(final ServerName serverName) { 096 return deadServers.containsKey(serverName); 097 } 098 099 /** 100 * @param serverName server name. 101 * @return true if this server is on the processing servers list false otherwise 102 */ 103 public synchronized boolean isProcessingServer(final ServerName serverName) { 104 return processingServers.contains(serverName); 105 } 106 107 /** 108 * Checks if there are currently any dead servers being processed by the 109 * master. Returns true if at least one region server is currently being 110 * processed as dead. 111 * 112 * @return true if any RS are being processed as dead 113 */ 114 public synchronized boolean areDeadServersInProgress() { 115 return !processingServers.isEmpty(); 116 } 117 118 public synchronized Set<ServerName> copyServerNames() { 119 Set<ServerName> clone = new HashSet<>(deadServers.size()); 120 clone.addAll(deadServers.keySet()); 121 return clone; 122 } 123 124 /** 125 * Adds the server to the dead server list if it's not there already. 126 */ 127 public synchronized void add(ServerName sn) { 128 if (!deadServers.containsKey(sn)){ 129 deadServers.put(sn, EnvironmentEdgeManager.currentTime()); 130 } 131 boolean added = processingServers.add(sn); 132 if (LOG.isDebugEnabled() && added) { 133 LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); 134 } 135 } 136 137 /** 138 * Notify that we started processing this dead server. 139 * @param sn ServerName for the dead server. 140 */ 141 public synchronized void notifyServer(ServerName sn) { 142 boolean added = processingServers.add(sn); 143 if (LOG.isDebugEnabled()) { 144 if (added) { 145 LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); 146 } 147 LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size()); 148 } 149 } 150 151 /** 152 * Complete processing for this dead server. 153 * @param sn ServerName for the dead server. 154 */ 155 public synchronized void finish(ServerName sn) { 156 boolean removed = processingServers.remove(sn); 157 if (LOG.isDebugEnabled()) { 158 LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size()); 159 if (removed) { 160 LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size()); 161 } 162 } 163 } 164 165 public synchronized int size() { 166 return deadServers.size(); 167 } 168 169 public synchronized boolean isEmpty() { 170 return deadServers.isEmpty(); 171 } 172 173 public synchronized void cleanAllPreviousInstances(final ServerName newServerName) { 174 Iterator<ServerName> it = deadServers.keySet().iterator(); 175 while (it.hasNext()) { 176 ServerName sn = it.next(); 177 if (ServerName.isSameAddress(sn, newServerName)) { 178 // remove from deadServers 179 it.remove(); 180 // remove from processingServers 181 boolean removed = processingServers.remove(sn); 182 if (removed) { 183 LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size()); 184 } 185 } 186 } 187 } 188 189 @Override 190 public synchronized String toString() { 191 // Display unified set of servers from both maps 192 Set<ServerName> servers = new HashSet<ServerName>(); 193 servers.addAll(deadServers.keySet()); 194 servers.addAll(processingServers); 195 StringBuilder sb = new StringBuilder(); 196 for (ServerName sn : servers) { 197 if (sb.length() > 0) { 198 sb.append(", "); 199 } 200 sb.append(sn.toString()); 201 // Star entries that are being processed 202 if (processingServers.contains(sn)) { 203 sb.append("*"); 204 } 205 } 206 return sb.toString(); 207 } 208 209 /** 210 * Extract all the servers dead since a given time, and sort them. 211 * @param ts the time, 0 for all 212 * @return a sorted array list, by death time, lowest values first. 213 */ 214 public synchronized List<Pair<ServerName, Long>> copyDeadServersSince(long ts){ 215 List<Pair<ServerName, Long>> res = new ArrayList<>(size()); 216 217 for (Map.Entry<ServerName, Long> entry:deadServers.entrySet()){ 218 if (entry.getValue() >= ts){ 219 res.add(new Pair<>(entry.getKey(), entry.getValue())); 220 } 221 } 222 223 Collections.sort(res, ServerNameDeathDateComparator); 224 return res; 225 } 226 227 /** 228 * Get the time when a server died 229 * @param deadServerName the dead server name 230 * @return the date when the server died 231 */ 232 public synchronized Date getTimeOfDeath(final ServerName deadServerName){ 233 Long time = deadServers.get(deadServerName); 234 return time == null ? null : new Date(time); 235 } 236 237 private static Comparator<Pair<ServerName, Long>> ServerNameDeathDateComparator = 238 new Comparator<Pair<ServerName, Long>>(){ 239 240 @Override 241 public int compare(Pair<ServerName, Long> o1, Pair<ServerName, Long> o2) { 242 return o1.getSecond().compareTo(o2.getSecond()); 243 } 244 }; 245 246 /** 247 * remove the specified dead server 248 * @param deadServerName the dead server name 249 * @return true if this server was removed 250 */ 251 252 public synchronized boolean removeDeadServer(final ServerName deadServerName) { 253 Preconditions.checkState(!processingServers.contains(deadServerName), 254 "Asked to remove server still in processingServers set " + deadServerName + 255 " (numProcessing=" + processingServers.size() + ")"); 256 if (deadServers.remove(deadServerName) == null) { 257 return false; 258 } 259 return true; 260 } 261}