001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.HashMap; 024import java.util.HashSet; 025import java.util.Iterator; 026import java.util.LinkedList; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.hbase.NamespaceDescriptor; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.constraint.ConstraintException; 037import org.apache.hadoop.hbase.master.HMaster; 038import org.apache.hadoop.hbase.master.LoadBalancer; 039import org.apache.hadoop.hbase.master.MasterServices; 040import org.apache.hadoop.hbase.master.RegionPlan; 041import org.apache.hadoop.hbase.master.RegionState; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 044import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; 045import org.apache.hadoop.hbase.net.Address; 046import org.apache.yetus.audience.InterfaceAudience; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 051import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 052 053/** 054 * Service to support Region Server Grouping (HBase-6721). 055 */ 056@InterfaceAudience.Private 057public class RSGroupAdminServer implements RSGroupAdmin { 058 private static final Logger LOG = LoggerFactory.getLogger(RSGroupAdminServer.class); 059 060 private MasterServices master; 061 private final RSGroupInfoManager rsGroupInfoManager; 062 063 public RSGroupAdminServer(MasterServices master, RSGroupInfoManager rsGroupInfoManager) 064 throws IOException { 065 this.master = master; 066 this.rsGroupInfoManager = rsGroupInfoManager; 067 } 068 069 @Override 070 public RSGroupInfo getRSGroupInfo(String groupName) throws IOException { 071 return rsGroupInfoManager.getRSGroup(groupName); 072 } 073 074 @Override 075 public RSGroupInfo getRSGroupInfoOfTable(TableName tableName) throws IOException { 076 // We are reading across two Maps in the below with out synchronizing across 077 // them; should be safe most of the time. 078 String groupName = rsGroupInfoManager.getRSGroupOfTable(tableName); 079 return groupName == null? null: rsGroupInfoManager.getRSGroup(groupName); 080 } 081 082 private void checkOnlineServersOnly(Set<Address> servers) throws ConstraintException { 083 // This uglyness is because we only have Address, not ServerName. 084 // Online servers are keyed by ServerName. 085 Set<Address> onlineServers = new HashSet<>(); 086 for(ServerName server: master.getServerManager().getOnlineServers().keySet()) { 087 onlineServers.add(server.getAddress()); 088 } 089 for (Address address: servers) { 090 if (!onlineServers.contains(address)) { 091 throw new ConstraintException( 092 "Server " + address + " is not an online server in 'default' RSGroup."); 093 } 094 } 095 } 096 097 /** 098 * Check passed name. Fail if nulls or if corresponding RSGroupInfo not found. 099 * @return The RSGroupInfo named <code>name</code> 100 */ 101 private RSGroupInfo getAndCheckRSGroupInfo(String name) throws IOException { 102 if (StringUtils.isEmpty(name)) { 103 throw new ConstraintException("RSGroup cannot be null."); 104 } 105 RSGroupInfo rsGroupInfo = getRSGroupInfo(name); 106 if (rsGroupInfo == null) { 107 throw new ConstraintException("RSGroup does not exist: " + name); 108 } 109 return rsGroupInfo; 110 } 111 112 /** 113 * @return List of Regions associated with this <code>server</code>. 114 */ 115 private List<RegionInfo> getRegions(final Address server) { 116 LinkedList<RegionInfo> regions = new LinkedList<>(); 117 for (Map.Entry<RegionInfo, ServerName> el : 118 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) { 119 if (el.getValue() == null) { 120 continue; 121 } 122 123 if (el.getValue().getAddress().equals(server)) { 124 addRegion(regions, el.getKey()); 125 } 126 } 127 for (RegionStateNode state : master.getAssignmentManager().getRegionsInTransition()) { 128 if (state.getRegionLocation().getAddress().equals(server)) { 129 addRegion(regions, state.getRegionInfo()); 130 } 131 } 132 return regions; 133 } 134 135 private void addRegion(final LinkedList<RegionInfo> regions, RegionInfo hri) { 136 // If meta, move it last otherwise other unassigns fail because meta is not 137 // online for them to update state in. This is dodgy. Needs to be made more 138 // robust. See TODO below. 139 if (hri.isMetaRegion()) { 140 regions.addLast(hri); 141 } else { 142 regions.addFirst(hri); 143 } 144 } 145 146 /** 147 * Check servers and tables. 148 * 149 * @param servers servers to move 150 * @param tables tables to move 151 * @param targetGroupName target group name 152 * @throws IOException if nulls or if servers and tables not belong to the same group 153 */ 154 private void checkServersAndTables(Set<Address> servers, Set<TableName> tables, 155 String targetGroupName) throws IOException { 156 // Presume first server's source group. Later ensure all servers are from this group. 157 Address firstServer = servers.iterator().next(); 158 RSGroupInfo tmpSrcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer); 159 if (tmpSrcGrp == null) { 160 // Be careful. This exception message is tested for in TestRSGroupsBase... 161 throw new ConstraintException("Source RSGroup for server " + firstServer 162 + " does not exist."); 163 } 164 RSGroupInfo srcGrp = new RSGroupInfo(tmpSrcGrp); 165 if (srcGrp.getName().equals(targetGroupName)) { 166 throw new ConstraintException("Target RSGroup " + targetGroupName + 167 " is same as source " + srcGrp.getName() + " RSGroup."); 168 } 169 // Only move online servers 170 checkOnlineServersOnly(servers); 171 172 // Ensure all servers are of same rsgroup. 173 for (Address server: servers) { 174 String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName(); 175 if (!tmpGroup.equals(srcGrp.getName())) { 176 throw new ConstraintException("Move server request should only come from one source " + 177 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 178 } 179 } 180 181 // Ensure all tables and servers are of same rsgroup. 182 for (TableName table : tables) { 183 String tmpGroup = rsGroupInfoManager.getRSGroupOfTable(table); 184 if (!tmpGroup.equals(srcGrp.getName())) { 185 throw new ConstraintException("Move table request should only come from one source " + 186 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 187 } 188 } 189 190 if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > tables.size()) { 191 throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() + 192 " that contains tables without servers to host them."); 193 } 194 } 195 196 /** 197 * Moves every region from servers which are currently located on these servers, 198 * but should not be located there. 199 * @param servers the servers that will move to new group 200 * @param tables these tables will be kept on the servers, others will be moved 201 * @param targetGroupName the target group name 202 * @throws IOException if moving the server and tables fail 203 */ 204 private void moveRegionsFromServers(Set<Address> servers, Set<TableName> tables, 205 String targetGroupName) throws IOException { 206 boolean foundRegionsToMove; 207 RSGroupInfo targetGrp = getRSGroupInfo(targetGroupName); 208 Set<Address> allSevers = new HashSet<>(servers); 209 do { 210 foundRegionsToMove = false; 211 for (Iterator<Address> iter = allSevers.iterator(); iter.hasNext();) { 212 Address rs = iter.next(); 213 // Get regions that are associated with this server and filter regions by tables. 214 List<RegionInfo> regions = new ArrayList<>(); 215 for (RegionInfo region : getRegions(rs)) { 216 if (!tables.contains(region.getTable())) { 217 regions.add(region); 218 } 219 } 220 221 LOG.info("Moving " + regions.size() + " region(s) from " + rs + 222 " for server move to " + targetGroupName); 223 if (!regions.isEmpty()) { 224 for (RegionInfo region: regions) { 225 // Regions might get assigned from tables of target group so we need to filter 226 if (!targetGrp.containsTable(region.getTable())) { 227 this.master.getAssignmentManager().move(region); 228 if (master.getAssignmentManager().getRegionStates(). 229 getRegionState(region).isFailedOpen()) { 230 continue; 231 } 232 foundRegionsToMove = true; 233 } 234 } 235 } 236 if (!foundRegionsToMove) { 237 iter.remove(); 238 } 239 } 240 try { 241 rsGroupInfoManager.wait(1000); 242 } catch (InterruptedException e) { 243 LOG.warn("Sleep interrupted", e); 244 Thread.currentThread().interrupt(); 245 } 246 } while (foundRegionsToMove); 247 } 248 249 /** 250 * Moves every region of tables which should be kept on the servers, 251 * but currently they are located on other servers. 252 * @param servers the regions of these servers will be kept on the servers, others will be moved 253 * @param tables the tables that will move to new group 254 * @param targetGroupName the target group name 255 * @throws IOException if moving the region fails 256 */ 257 private void moveRegionsToServers(Set<Address> servers, Set<TableName> tables, 258 String targetGroupName) throws IOException { 259 for (TableName table: tables) { 260 LOG.info("Moving region(s) from " + table + " for table move to " + targetGroupName); 261 for (RegionInfo region : master.getAssignmentManager().getRegionStates() 262 .getRegionsOfTable(table)) { 263 ServerName sn = master.getAssignmentManager().getRegionStates() 264 .getRegionServerOfRegion(region); 265 if (!servers.contains(sn.getAddress())) { 266 master.getAssignmentManager().move(region); 267 } 268 } 269 } 270 } 271 272 @edu.umd.cs.findbugs.annotations.SuppressWarnings( 273 value="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE", 274 justification="Ignoring complaint because don't know what it is complaining about") 275 @Override 276 public void moveServers(Set<Address> servers, String targetGroupName) throws IOException { 277 if (servers == null) { 278 throw new ConstraintException("The list of servers to move cannot be null."); 279 } 280 if (servers.isEmpty()) { 281 // For some reason this difference between null servers and isEmpty is important distinction. 282 // TODO. Why? Stuff breaks if I equate them. 283 return; 284 } 285 RSGroupInfo targetGrp = getAndCheckRSGroupInfo(targetGroupName); 286 287 // Hold a lock on the manager instance while moving servers to prevent 288 // another writer changing our state while we are working. 289 synchronized (rsGroupInfoManager) { 290 if (master.getMasterCoprocessorHost() != null) { 291 master.getMasterCoprocessorHost().preMoveServers(servers, targetGroupName); 292 } 293 // Presume first server's source group. Later ensure all servers are from this group. 294 Address firstServer = servers.iterator().next(); 295 RSGroupInfo srcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer); 296 if (srcGrp == null) { 297 // Be careful. This exception message is tested for in TestRSGroupsBase... 298 throw new ConstraintException("Source RSGroup for server " + firstServer 299 + " does not exist."); 300 } 301 if (srcGrp.getName().equals(targetGroupName)) { 302 throw new ConstraintException("Target RSGroup " + targetGroupName + 303 " is same as source " + srcGrp + " RSGroup."); 304 } 305 // Only move online servers (when moving from 'default') or servers from other 306 // groups. This prevents bogus servers from entering groups 307 if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName())) { 308 checkOnlineServersOnly(servers); 309 } 310 // Ensure all servers are of same rsgroup. 311 for (Address server: servers) { 312 String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName(); 313 if (!tmpGroup.equals(srcGrp.getName())) { 314 throw new ConstraintException("Move server request should only come from one source " + 315 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 316 } 317 } 318 if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > 0) { 319 throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() + 320 " that contains tables without servers to host them."); 321 } 322 323 // MovedServers may be < passed in 'servers'. 324 Set<Address> movedServers = rsGroupInfoManager.moveServers(servers, srcGrp.getName(), 325 targetGroupName); 326 List<Address> editableMovedServers = Lists.newArrayList(movedServers); 327 boolean foundRegionsToMove; 328 do { 329 foundRegionsToMove = false; 330 for (Iterator<Address> iter = editableMovedServers.iterator(); iter.hasNext();) { 331 Address rs = iter.next(); 332 // Get regions that are associated with this server. 333 List<RegionInfo> regions = getRegions(rs); 334 335 LOG.info("Moving " + regions.size() + " region(s) from " + rs + 336 " for server move to " + targetGroupName); 337 338 for (RegionInfo region: regions) { 339 // Regions might get assigned from tables of target group so we need to filter 340 if (targetGrp.containsTable(region.getTable())) { 341 continue; 342 } 343 LOG.info("Moving region " + region.getShortNameToLog()); 344 this.master.getAssignmentManager().move(region); 345 if (master.getAssignmentManager().getRegionStates(). 346 getRegionState(region).isFailedOpen()) { 347 // If region is in FAILED_OPEN state, it won't recover, not without 348 // operator intervention... in hbase-2.0.0 at least. Continue rather 349 // than mark region as 'foundRegionsToMove'. 350 continue; 351 } 352 foundRegionsToMove = true; 353 } 354 if (!foundRegionsToMove) { 355 iter.remove(); 356 } 357 } 358 try { 359 rsGroupInfoManager.wait(1000); 360 } catch (InterruptedException e) { 361 LOG.warn("Sleep interrupted", e); 362 Thread.currentThread().interrupt(); 363 } 364 } while (foundRegionsToMove); 365 366 if (master.getMasterCoprocessorHost() != null) { 367 master.getMasterCoprocessorHost().postMoveServers(servers, targetGroupName); 368 } 369 LOG.info("Move server done: " + srcGrp.getName() + "=>" + targetGroupName); 370 } 371 } 372 373 @Override 374 public void moveTables(Set<TableName> tables, String targetGroup) throws IOException { 375 if (tables == null) { 376 throw new ConstraintException("The list of servers cannot be null."); 377 } 378 if (tables.size() < 1) { 379 LOG.debug("moveTables() passed an empty set. Ignoring."); 380 return; 381 } 382 383 // Hold a lock on the manager instance while moving servers to prevent 384 // another writer changing our state while we are working. 385 synchronized (rsGroupInfoManager) { 386 if (master.getMasterCoprocessorHost() != null) { 387 master.getMasterCoprocessorHost().preMoveTables(tables, targetGroup); 388 } 389 if(targetGroup != null) { 390 RSGroupInfo destGroup = rsGroupInfoManager.getRSGroup(targetGroup); 391 if(destGroup == null) { 392 throw new ConstraintException("Target " + targetGroup + " RSGroup does not exist."); 393 } 394 if(destGroup.getServers().size() < 1) { 395 throw new ConstraintException("Target RSGroup must have at least one server."); 396 } 397 } 398 399 for (TableName table : tables) { 400 String srcGroup = rsGroupInfoManager.getRSGroupOfTable(table); 401 if(srcGroup != null && srcGroup.equals(targetGroup)) { 402 throw new ConstraintException( 403 "Source RSGroup " + srcGroup + " is same as target " + targetGroup + 404 " RSGroup for table " + table); 405 } 406 LOG.info("Moving table " + table.getNameAsString() + " to RSGroup " + targetGroup); 407 } 408 rsGroupInfoManager.moveTables(tables, targetGroup); 409 410 // targetGroup is null when a table is being deleted. In this case no further 411 // action is required. 412 if (targetGroup != null) { 413 for (TableName table: tables) { 414 if (master.getAssignmentManager().isTableDisabled(table)) { 415 LOG.debug("Skipping move regions because the table" + table + " is disabled."); 416 continue; 417 } 418 for (RegionInfo region : 419 master.getAssignmentManager().getRegionStates().getRegionsOfTable(table)) { 420 LOG.info("Moving region " + region.getShortNameToLog() + 421 " to RSGroup " + targetGroup); 422 master.getAssignmentManager().move(region); 423 } 424 } 425 } 426 427 if (master.getMasterCoprocessorHost() != null) { 428 master.getMasterCoprocessorHost().postMoveTables(tables, targetGroup); 429 } 430 } 431 } 432 433 @Override 434 public void addRSGroup(String name) throws IOException { 435 if (master.getMasterCoprocessorHost() != null) { 436 master.getMasterCoprocessorHost().preAddRSGroup(name); 437 } 438 rsGroupInfoManager.addRSGroup(new RSGroupInfo(name)); 439 if (master.getMasterCoprocessorHost() != null) { 440 master.getMasterCoprocessorHost().postAddRSGroup(name); 441 } 442 } 443 444 @Override 445 public void removeRSGroup(String name) throws IOException { 446 // Hold a lock on the manager instance while moving servers to prevent 447 // another writer changing our state while we are working. 448 synchronized (rsGroupInfoManager) { 449 if (master.getMasterCoprocessorHost() != null) { 450 master.getMasterCoprocessorHost().preRemoveRSGroup(name); 451 } 452 RSGroupInfo rsGroupInfo = rsGroupInfoManager.getRSGroup(name); 453 if (rsGroupInfo == null) { 454 throw new ConstraintException("RSGroup " + name + " does not exist"); 455 } 456 int tableCount = rsGroupInfo.getTables().size(); 457 if (tableCount > 0) { 458 throw new ConstraintException("RSGroup " + name + " has " + tableCount + 459 " tables; you must remove these tables from the rsgroup before " + 460 "the rsgroup can be removed."); 461 } 462 int serverCount = rsGroupInfo.getServers().size(); 463 if (serverCount > 0) { 464 throw new ConstraintException("RSGroup " + name + " has " + serverCount + 465 " servers; you must remove these servers from the RSGroup before" + 466 "the RSGroup can be removed."); 467 } 468 for (NamespaceDescriptor ns: master.getClusterSchema().getNamespaces()) { 469 String nsGroup = ns.getConfigurationValue(rsGroupInfo.NAMESPACE_DESC_PROP_GROUP); 470 if (nsGroup != null && nsGroup.equals(name)) { 471 throw new ConstraintException("RSGroup " + name + " is referenced by namespace: " + 472 ns.getName()); 473 } 474 } 475 rsGroupInfoManager.removeRSGroup(name); 476 if (master.getMasterCoprocessorHost() != null) { 477 master.getMasterCoprocessorHost().postRemoveRSGroup(name); 478 } 479 } 480 } 481 482 @Override 483 public boolean balanceRSGroup(String groupName) throws IOException { 484 ServerManager serverManager = master.getServerManager(); 485 AssignmentManager assignmentManager = master.getAssignmentManager(); 486 LoadBalancer balancer = master.getLoadBalancer(); 487 488 synchronized (balancer) { 489 // If balance not true, don't run balancer. 490 if (!((HMaster) master).isBalancerOn()) { 491 return false; 492 } 493 494 if (master.getMasterCoprocessorHost() != null) { 495 master.getMasterCoprocessorHost().preBalanceRSGroup(groupName); 496 } 497 if (getRSGroupInfo(groupName) == null) { 498 throw new ConstraintException("RSGroup does not exist: "+groupName); 499 } 500 // Only allow one balance run at at time. 501 Map<String, RegionState> groupRIT = rsGroupGetRegionsInTransition(groupName); 502 if (groupRIT.size() > 0) { 503 LOG.debug("Not running balancer because " + groupRIT.size() + " region(s) in transition: " + 504 StringUtils.abbreviate( 505 master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(), 506 256)); 507 return false; 508 } 509 if (serverManager.areDeadServersInProgress()) { 510 LOG.debug("Not running balancer because processing dead regionserver(s): " + 511 serverManager.getDeadServers()); 512 return false; 513 } 514 515 //We balance per group instead of per table 516 List<RegionPlan> plans = new ArrayList<>(); 517 for(Map.Entry<TableName, Map<ServerName, List<RegionInfo>>> tableMap: 518 getRSGroupAssignmentsByTable(groupName).entrySet()) { 519 LOG.info("Creating partial plan for table " + tableMap.getKey() + ": " 520 + tableMap.getValue()); 521 List<RegionPlan> partialPlans = balancer.balanceCluster(tableMap.getValue()); 522 LOG.info("Partial plan for table " + tableMap.getKey() + ": " + partialPlans); 523 if (partialPlans != null) { 524 plans.addAll(partialPlans); 525 } 526 } 527 long startTime = System.currentTimeMillis(); 528 boolean balancerRan = !plans.isEmpty(); 529 if (balancerRan) { 530 LOG.info("RSGroup balance " + groupName + " starting with plan count: " + plans.size()); 531 for (RegionPlan plan: plans) { 532 LOG.info("balance " + plan); 533 assignmentManager.moveAsync(plan); 534 } 535 LOG.info("RSGroup balance " + groupName + " completed after " + 536 (System.currentTimeMillis()-startTime) + " seconds"); 537 } 538 if (master.getMasterCoprocessorHost() != null) { 539 master.getMasterCoprocessorHost().postBalanceRSGroup(groupName, balancerRan); 540 } 541 return balancerRan; 542 } 543 } 544 545 @Override 546 public List<RSGroupInfo> listRSGroups() throws IOException { 547 return rsGroupInfoManager.listRSGroups(); 548 } 549 550 @Override 551 public RSGroupInfo getRSGroupOfServer(Address hostPort) throws IOException { 552 return rsGroupInfoManager.getRSGroupOfServer(hostPort); 553 } 554 555 @Override 556 public void moveServersAndTables(Set<Address> servers, Set<TableName> tables, String targetGroup) 557 throws IOException { 558 if (servers == null || servers.isEmpty()) { 559 throw new ConstraintException("The list of servers to move cannot be null or empty."); 560 } 561 if (tables == null || tables.isEmpty()) { 562 throw new ConstraintException("The list of tables to move cannot be null or empty."); 563 } 564 565 //check target group 566 getAndCheckRSGroupInfo(targetGroup); 567 568 // Hold a lock on the manager instance while moving servers and tables to prevent 569 // another writer changing our state while we are working. 570 synchronized (rsGroupInfoManager) { 571 if (master.getMasterCoprocessorHost() != null) { 572 master.getMasterCoprocessorHost().preMoveServersAndTables(servers, tables, targetGroup); 573 } 574 //check servers and tables status 575 checkServersAndTables(servers, tables, targetGroup); 576 577 //Move servers and tables to a new group. 578 String srcGroup = getRSGroupOfServer(servers.iterator().next()).getName(); 579 rsGroupInfoManager.moveServersAndTables(servers, tables, srcGroup, targetGroup); 580 581 //move regions which should not belong to these tables 582 moveRegionsFromServers(servers, tables, targetGroup); 583 //move regions which should belong to these servers 584 moveRegionsToServers(servers, tables, targetGroup); 585 586 if (master.getMasterCoprocessorHost() != null) { 587 master.getMasterCoprocessorHost().postMoveServersAndTables(servers, tables, targetGroup); 588 } 589 } 590 LOG.info("Move servers and tables done. Severs :" 591 + servers + " , Tables : " + tables + " => " + targetGroup); 592 } 593 594 @Override 595 public void removeServers(Set<Address> servers) throws IOException { 596 { 597 if (servers == null || servers.isEmpty()) { 598 throw new ConstraintException("The set of servers to remove cannot be null or empty."); 599 } 600 // Hold a lock on the manager instance while moving servers to prevent 601 // another writer changing our state while we are working. 602 synchronized (rsGroupInfoManager) { 603 if (master.getMasterCoprocessorHost() != null) { 604 master.getMasterCoprocessorHost().preRemoveServers(servers); 605 } 606 //check the set of servers 607 checkForDeadOrOnlineServers(servers); 608 rsGroupInfoManager.removeServers(servers); 609 if (master.getMasterCoprocessorHost() != null) { 610 master.getMasterCoprocessorHost().postRemoveServers(servers); 611 } 612 LOG.info("Remove decommissioned servers " + servers + " from rsgroup done."); 613 } 614 } 615 } 616 617 private Map<String, RegionState> rsGroupGetRegionsInTransition(String groupName) 618 throws IOException { 619 Map<String, RegionState> rit = Maps.newTreeMap(); 620 AssignmentManager am = master.getAssignmentManager(); 621 for(TableName tableName : getRSGroupInfo(groupName).getTables()) { 622 for(RegionInfo regionInfo: am.getRegionStates().getRegionsOfTable(tableName)) { 623 RegionState state = am.getRegionStates().getRegionTransitionState(regionInfo); 624 if(state != null) { 625 rit.put(regionInfo.getEncodedName(), state); 626 } 627 } 628 } 629 return rit; 630 } 631 632 private Map<TableName, Map<ServerName, List<RegionInfo>>> 633 getRSGroupAssignmentsByTable(String groupName) throws IOException { 634 Map<TableName, Map<ServerName, List<RegionInfo>>> result = Maps.newHashMap(); 635 RSGroupInfo rsGroupInfo = getRSGroupInfo(groupName); 636 Map<TableName, Map<ServerName, List<RegionInfo>>> assignments = Maps.newHashMap(); 637 for(Map.Entry<RegionInfo, ServerName> entry: 638 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) { 639 TableName currTable = entry.getKey().getTable(); 640 ServerName currServer = entry.getValue(); 641 RegionInfo currRegion = entry.getKey(); 642 if (rsGroupInfo.getTables().contains(currTable)) { 643 assignments.putIfAbsent(currTable, new HashMap<>()); 644 assignments.get(currTable).putIfAbsent(currServer, new ArrayList<>()); 645 assignments.get(currTable).get(currServer).add(currRegion); 646 } 647 } 648 649 Map<ServerName, List<RegionInfo>> serverMap = Maps.newHashMap(); 650 for(ServerName serverName: master.getServerManager().getOnlineServers().keySet()) { 651 if(rsGroupInfo.getServers().contains(serverName.getAddress())) { 652 serverMap.put(serverName, Collections.emptyList()); 653 } 654 } 655 656 // add all tables that are members of the group 657 for(TableName tableName : rsGroupInfo.getTables()) { 658 if(assignments.containsKey(tableName)) { 659 result.put(tableName, new HashMap<>()); 660 result.get(tableName).putAll(serverMap); 661 result.get(tableName).putAll(assignments.get(tableName)); 662 LOG.debug("Adding assignments for " + tableName + ": " + assignments.get(tableName)); 663 } 664 } 665 666 return result; 667 } 668 669 /** 670 * Check if the set of servers are belong to dead servers list or online servers list. 671 * @param servers servers to remove 672 */ 673 private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException { 674 // This uglyness is because we only have Address, not ServerName. 675 Set<Address> onlineServers = new HashSet<>(); 676 List<ServerName> drainingServers = master.getServerManager().getDrainingServersList(); 677 for (ServerName server : master.getServerManager().getOnlineServers().keySet()) { 678 // Only online but not decommissioned servers are really online 679 if (!drainingServers.contains(server)) { 680 onlineServers.add(server.getAddress()); 681 } 682 } 683 684 Set<Address> deadServers = new HashSet<>(); 685 for(ServerName server: master.getServerManager().getDeadServers().copyServerNames()) { 686 deadServers.add(server.getAddress()); 687 } 688 689 for (Address address: servers) { 690 if (onlineServers.contains(address)) { 691 throw new ConstraintException( 692 "Server " + address + " is an online server, not allowed to remove."); 693 } 694 if (deadServers.contains(address)) { 695 throw new ConstraintException( 696 "Server " + address + " is on the dead servers list," 697 + " Maybe it will come back again, not allowed to remove."); 698 } 699 } 700 } 701}