001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collections; 023import java.util.HashMap; 024import java.util.HashSet; 025import java.util.LinkedList; 026import java.util.List; 027import java.util.Map; 028import java.util.Set; 029import java.util.concurrent.Future; 030import org.apache.commons.lang3.StringUtils; 031import org.apache.hadoop.hbase.NamespaceDescriptor; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.RegionInfo; 035import org.apache.hadoop.hbase.client.TableState; 036import org.apache.hadoop.hbase.constraint.ConstraintException; 037import org.apache.hadoop.hbase.master.HMaster; 038import org.apache.hadoop.hbase.master.LoadBalancer; 039import org.apache.hadoop.hbase.master.MasterServices; 040import org.apache.hadoop.hbase.master.RegionPlan; 041import org.apache.hadoop.hbase.master.RegionState; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.master.TableStateManager; 044import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 045import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 046import org.apache.hadoop.hbase.net.Address; 047import org.apache.hadoop.hbase.util.Pair; 048import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 049import org.apache.yetus.audience.InterfaceAudience; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053/** 054 * Service to support Region Server Grouping (HBase-6721). 055 */ 056@InterfaceAudience.Private 057public class RSGroupAdminServer implements RSGroupAdmin { 058 private static final Logger LOG = LoggerFactory.getLogger(RSGroupAdminServer.class); 059 public static final String KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE = "should keep at least " + 060 "one server in 'default' RSGroup."; 061 062 private MasterServices master; 063 private final RSGroupInfoManager rsGroupInfoManager; 064 065 public RSGroupAdminServer(MasterServices master, RSGroupInfoManager rsGroupInfoManager) { 066 this.master = master; 067 this.rsGroupInfoManager = rsGroupInfoManager; 068 } 069 070 @Override 071 public RSGroupInfo getRSGroupInfo(String groupName) throws IOException { 072 return rsGroupInfoManager.getRSGroup(groupName); 073 } 074 075 @Override 076 public RSGroupInfo getRSGroupInfoOfTable(TableName tableName) throws IOException { 077 // We are reading across two Maps in the below with out synchronizing across 078 // them; should be safe most of the time. 079 String groupName = rsGroupInfoManager.getRSGroupOfTable(tableName); 080 return groupName == null? null: rsGroupInfoManager.getRSGroup(groupName); 081 } 082 083 private void checkOnlineServersOnly(Set<Address> servers) throws ConstraintException { 084 // This uglyness is because we only have Address, not ServerName. 085 // Online servers are keyed by ServerName. 086 Set<Address> onlineServers = new HashSet<>(); 087 for(ServerName server: master.getServerManager().getOnlineServers().keySet()) { 088 onlineServers.add(server.getAddress()); 089 } 090 for (Address address: servers) { 091 if (!onlineServers.contains(address)) { 092 throw new ConstraintException( 093 "Server " + address + " is not an online server in 'default' RSGroup."); 094 } 095 } 096 } 097 098 /** 099 * Check passed name. Fail if nulls or if corresponding RSGroupInfo not found. 100 * @return The RSGroupInfo named <code>name</code> 101 */ 102 private RSGroupInfo getAndCheckRSGroupInfo(String name) throws IOException { 103 if (StringUtils.isEmpty(name)) { 104 throw new ConstraintException("RSGroup cannot be null."); 105 } 106 RSGroupInfo rsGroupInfo = getRSGroupInfo(name); 107 if (rsGroupInfo == null) { 108 throw new ConstraintException("RSGroup does not exist: " + name); 109 } 110 return rsGroupInfo; 111 } 112 113 /** 114 * @return List of Regions associated with this <code>server</code>. 115 */ 116 private List<RegionInfo> getRegions(final Address server) { 117 LinkedList<RegionInfo> regions = new LinkedList<>(); 118 for (Map.Entry<RegionInfo, ServerName> el : 119 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) { 120 if (el.getValue() == null) { 121 continue; 122 } 123 124 if (el.getValue().getAddress().equals(server)) { 125 addRegion(regions, el.getKey()); 126 } 127 } 128 for (RegionStateNode state : master.getAssignmentManager().getRegionsInTransition()) { 129 if (state.getRegionLocation() != null && 130 state.getRegionLocation().getAddress().equals(server)) { 131 addRegion(regions, state.getRegionInfo()); 132 } 133 } 134 return regions; 135 } 136 137 private void addRegion(final LinkedList<RegionInfo> regions, RegionInfo hri) { 138 // If meta, move it last otherwise other unassigns fail because meta is not 139 // online for them to update state in. This is dodgy. Needs to be made more 140 // robust. See TODO below. 141 if (hri.isMetaRegion()) { 142 regions.addLast(hri); 143 } else { 144 regions.addFirst(hri); 145 } 146 } 147 148 /** 149 * Check servers and tables. 150 * 151 * @param servers servers to move 152 * @param tables tables to move 153 * @param targetGroupName target group name 154 * @throws IOException if nulls or if servers and tables not belong to the same group 155 */ 156 private void checkServersAndTables(Set<Address> servers, Set<TableName> tables, 157 String targetGroupName) throws IOException { 158 // Presume first server's source group. Later ensure all servers are from this group. 159 Address firstServer = servers.iterator().next(); 160 RSGroupInfo tmpSrcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer); 161 if (tmpSrcGrp == null) { 162 // Be careful. This exception message is tested for in TestRSGroupsBase... 163 throw new ConstraintException("Source RSGroup for server " + firstServer 164 + " does not exist."); 165 } 166 RSGroupInfo srcGrp = new RSGroupInfo(tmpSrcGrp); 167 168 // Only move online servers 169 checkOnlineServersOnly(servers); 170 171 // Ensure all servers are of same rsgroup. 172 for (Address server: servers) { 173 String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName(); 174 if (!tmpGroup.equals(srcGrp.getName())) { 175 throw new ConstraintException("Move server request should only come from one source " + 176 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 177 } 178 } 179 180 // Ensure all tables and servers are of same rsgroup. 181 for (TableName table : tables) { 182 String tmpGroup = rsGroupInfoManager.getRSGroupOfTable(table); 183 if (!tmpGroup.equals(srcGrp.getName())) { 184 throw new ConstraintException("Move table request should only come from one source " + 185 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 186 } 187 } 188 189 if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > tables.size()) { 190 throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() + 191 " that contains tables without servers to host them."); 192 } 193 } 194 195 /** 196 * Move every region from servers which are currently located on these servers, 197 * but should not be located there. 198 * 199 * @param movedServers the servers that are moved to new group 200 * @param movedTables the tables that are moved to new group 201 * @param srcGrpServers all servers in the source group, excluding the movedServers 202 * @param targetGrp the target group 203 * @throws IOException if any error while moving regions 204 */ 205 private void moveServerRegionsFromGroup(Set<Address> movedServers, Set<TableName> movedTables, 206 Set<Address> srcGrpServers, RSGroupInfo targetGrp) throws IOException { 207 // Get server names corresponding to given Addresses 208 List<ServerName> movedServerNames = new ArrayList<>(movedServers.size()); 209 List<ServerName> srcGrpServerNames = new ArrayList<>(srcGrpServers.size()); 210 for (ServerName serverName : master.getServerManager().getOnlineServers().keySet()) { 211 // In case region move failed in previous attempt, regionsOwners and newRegionsOwners 212 // can have the same servers. So for all servers below both conditions to be checked 213 if (srcGrpServers.contains(serverName.getAddress())) { 214 srcGrpServerNames.add(serverName); 215 } 216 if (movedServers.contains(serverName.getAddress())) { 217 movedServerNames.add(serverName); 218 } 219 } 220 // Set true to indicate at least one region movement failed 221 boolean errorInRegionMove; 222 List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>(); 223 int retry = 0; 224 do { 225 errorInRegionMove = false; 226 for (ServerName server : movedServerNames) { 227 List<RegionInfo> regionsOnServer = getRegions(server.getAddress()); 228 for (RegionInfo region : regionsOnServer) { 229 if (!movedTables.contains(region.getTable()) && !srcGrpServers 230 .contains(getRegionAddress(region))) { 231 LOG.info("Moving server region {}, which do not belong to RSGroup {}", 232 region.getShortNameToLog(), targetGrp.getName()); 233 // Move region back to source RSGroup servers 234 ServerName dest = 235 this.master.getLoadBalancer().randomAssignment(region, srcGrpServerNames); 236 if (dest == null) { 237 errorInRegionMove = true; 238 continue; 239 } 240 RegionPlan rp = new RegionPlan(region, server, dest); 241 try { 242 Future<byte[]> future = this.master.getAssignmentManager().moveAsync(rp); 243 assignmentFutures.add(Pair.newPair(region, future)); 244 } catch (Exception ioe) { 245 errorInRegionMove = true; 246 LOG.error("Move region {} from group failed, will retry, current retry time is {}", 247 region.getShortNameToLog(), retry, ioe); 248 } 249 } 250 } 251 } 252 boolean allRegionsMoved = 253 waitForRegionMovement(assignmentFutures, targetGrp.getName(), retry); 254 if (allRegionsMoved && !errorInRegionMove) { 255 LOG.info("All regions from server(s) {} moved to target group {}.", movedServerNames, 256 targetGrp.getName()); 257 return; 258 } else { 259 retry++; 260 try { 261 rsGroupInfoManager.wait(1000); 262 } catch (InterruptedException e) { 263 LOG.warn("Sleep interrupted", e); 264 Thread.currentThread().interrupt(); 265 } 266 } 267 } while (retry <= 50); 268 } 269 270 private Address getRegionAddress(RegionInfo hri) { 271 ServerName sn = master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri); 272 return sn.getAddress(); 273 } 274 275 /** 276 * Wait for all the region move to complete. Keep waiting for other region movement 277 * completion even if some region movement fails. 278 */ 279 private boolean waitForRegionMovement(List<Pair<RegionInfo, Future<byte[]>>> regionMoveFutures, 280 String tgtGrpName, int retryCount) { 281 LOG.info("Moving {} region(s) to group {}, current retry={}", regionMoveFutures.size(), 282 tgtGrpName, retryCount); 283 boolean allRegionsMoved = true; 284 for (Pair<RegionInfo, Future<byte[]>> pair : regionMoveFutures) { 285 try { 286 pair.getSecond().get(); 287 if (master.getAssignmentManager().getRegionStates(). 288 getRegionState(pair.getFirst()).isFailedOpen()) { 289 allRegionsMoved = false; 290 } 291 } catch (InterruptedException e) { 292 LOG.warn("Sleep interrupted", e); 293 // Dont return form there lets wait for other regions to complete movement. 294 allRegionsMoved = false; 295 } catch (Exception e) { 296 allRegionsMoved = false; 297 LOG.error("Move region {} to group {} failed, will retry on next attempt", 298 pair.getFirst().getShortNameToLog(), tgtGrpName, e); 299 } 300 } 301 return allRegionsMoved; 302 } 303 304 /** 305 * Moves regions of tables which are not on target group servers. 306 * 307 * @param tables the tables that will move to new group 308 * @param targetGrp the target group 309 * @throws IOException if moving the region fails 310 */ 311 private void moveTableRegionsToGroup(Set<TableName> tables, RSGroupInfo targetGrp) 312 throws IOException { 313 List<ServerName> targetGrpSevers = new ArrayList<>(targetGrp.getServers().size()); 314 for (ServerName serverName : master.getServerManager().getOnlineServers().keySet()) { 315 if (targetGrp.getServers().contains(serverName.getAddress())) { 316 targetGrpSevers.add(serverName); 317 } 318 } 319 //Set true to indicate at least one region movement failed 320 boolean errorInRegionMove; 321 int retry = 0; 322 List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>(); 323 do { 324 errorInRegionMove = false; 325 for (TableName table : tables) { 326 if (master.getTableStateManager().isTableState(table, TableState.State.DISABLED, 327 TableState.State.DISABLING)) { 328 LOG.debug("Skipping move regions because the table {} is disabled", table); 329 continue; 330 } 331 LOG.info("Moving region(s) for table {} to RSGroup {}", table, targetGrp.getName()); 332 for (RegionInfo region : master.getAssignmentManager().getRegionStates() 333 .getRegionsOfTable(table)) { 334 ServerName sn = 335 master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(region); 336 if (!targetGrp.containsServer(sn.getAddress())) { 337 LOG.info("Moving region {} to RSGroup {}", region.getShortNameToLog(), 338 targetGrp.getName()); 339 ServerName dest = 340 this.master.getLoadBalancer().randomAssignment(region, targetGrpSevers); 341 if (dest == null) { 342 errorInRegionMove = true; 343 continue; 344 } 345 RegionPlan rp = new RegionPlan(region, sn, dest); 346 try { 347 Future<byte[]> future = this.master.getAssignmentManager().moveAsync(rp); 348 assignmentFutures.add(Pair.newPair(region, future)); 349 } catch (Exception ioe) { 350 errorInRegionMove = true; 351 LOG.error("Move region {} to group failed, will retry, current retry time is {}", 352 region.getShortNameToLog(), retry, ioe); 353 } 354 355 } 356 } 357 } 358 boolean allRegionsMoved = 359 waitForRegionMovement(assignmentFutures, targetGrp.getName(), retry); 360 if (allRegionsMoved && !errorInRegionMove) { 361 LOG.info("All regions from table(s) {} moved to target group {}.", tables, 362 targetGrp.getName()); 363 return; 364 } else { 365 retry++; 366 try { 367 rsGroupInfoManager.wait(1000); 368 } catch (InterruptedException e) { 369 LOG.warn("Sleep interrupted", e); 370 Thread.currentThread().interrupt(); 371 } 372 } 373 } while (retry <= 50); 374 } 375 376 @edu.umd.cs.findbugs.annotations.SuppressWarnings( 377 value="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE", 378 justification="Ignoring complaint because don't know what it is complaining about") 379 @Override 380 public void moveServers(Set<Address> servers, String targetGroupName) throws IOException { 381 if (servers == null) { 382 throw new ConstraintException("The list of servers to move cannot be null."); 383 } 384 if (servers.isEmpty()) { 385 // For some reason this difference between null servers and isEmpty is important distinction. 386 // TODO. Why? Stuff breaks if I equate them. 387 return; 388 } 389 //check target group 390 getAndCheckRSGroupInfo(targetGroupName); 391 392 // Hold a lock on the manager instance while moving servers to prevent 393 // another writer changing our state while we are working. 394 synchronized (rsGroupInfoManager) { 395 // Presume first server's source group. Later ensure all servers are from this group. 396 Address firstServer = servers.iterator().next(); 397 RSGroupInfo srcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer); 398 if (srcGrp == null) { 399 // Be careful. This exception message is tested for in TestRSGroupsBase... 400 throw new ConstraintException("Source RSGroup for server " + firstServer 401 + " does not exist."); 402 } 403 // Only move online servers (when moving from 'default') or servers from other 404 // groups. This prevents bogus servers from entering groups 405 if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName())) { 406 if (srcGrp.getServers().size() <= servers.size()) { 407 throw new ConstraintException(KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE); 408 } 409 checkOnlineServersOnly(servers); 410 } 411 // Ensure all servers are of same rsgroup. 412 for (Address server: servers) { 413 String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName(); 414 if (!tmpGroup.equals(srcGrp.getName())) { 415 throw new ConstraintException("Move server request should only come from one source " + 416 "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup); 417 } 418 } 419 if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > 0) { 420 throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() + 421 " that contains tables without servers to host them."); 422 } 423 424 // MovedServers may be < passed in 'servers'. 425 Set<Address> movedServers = rsGroupInfoManager.moveServers(servers, srcGrp.getName(), 426 targetGroupName); 427 moveServerRegionsFromGroup(movedServers, Collections.emptySet(), 428 rsGroupInfoManager.getRSGroup(srcGrp.getName()).getServers(), 429 rsGroupInfoManager.getRSGroup(targetGroupName)); 430 LOG.info("Move servers done: {} => {}", srcGrp.getName(), targetGroupName); 431 } 432 } 433 434 @Override 435 public void moveTables(Set<TableName> tables, String targetGroup) throws IOException { 436 if (tables == null) { 437 throw new ConstraintException("The list of tables cannot be null."); 438 } 439 if (tables.size() < 1) { 440 LOG.debug("moveTables() passed an empty set. Ignoring."); 441 return; 442 } 443 444 // Hold a lock on the manager instance while moving servers to prevent 445 // another writer changing our state while we are working. 446 synchronized (rsGroupInfoManager) { 447 if(targetGroup != null) { 448 RSGroupInfo destGroup = rsGroupInfoManager.getRSGroup(targetGroup); 449 if(destGroup == null) { 450 throw new ConstraintException("Target " + targetGroup + " RSGroup does not exist."); 451 } 452 if(destGroup.getServers().size() < 1) { 453 throw new ConstraintException("Target RSGroup must have at least one server."); 454 } 455 } 456 rsGroupInfoManager.moveTables(tables, targetGroup); 457 458 // targetGroup is null when a table is being deleted. In this case no further 459 // action is required. 460 if (targetGroup != null) { 461 moveTableRegionsToGroup(tables, rsGroupInfoManager.getRSGroup(targetGroup)); 462 } 463 } 464 } 465 466 @Override 467 public void addRSGroup(String name) throws IOException { 468 rsGroupInfoManager.addRSGroup(new RSGroupInfo(name)); 469 } 470 471 @Override 472 public void removeRSGroup(String name) throws IOException { 473 // Hold a lock on the manager instance while moving servers to prevent 474 // another writer changing our state while we are working. 475 synchronized (rsGroupInfoManager) { 476 RSGroupInfo rsGroupInfo = rsGroupInfoManager.getRSGroup(name); 477 if (rsGroupInfo == null) { 478 throw new ConstraintException("RSGroup " + name + " does not exist"); 479 } 480 int tableCount = rsGroupInfo.getTables().size(); 481 if (tableCount > 0) { 482 throw new ConstraintException("RSGroup " + name + " has " + tableCount + 483 " tables; you must remove these tables from the rsgroup before " + 484 "the rsgroup can be removed."); 485 } 486 int serverCount = rsGroupInfo.getServers().size(); 487 if (serverCount > 0) { 488 throw new ConstraintException("RSGroup " + name + " has " + serverCount + 489 " servers; you must remove these servers from the RSGroup before" + 490 "the RSGroup can be removed."); 491 } 492 for (NamespaceDescriptor ns : master.getClusterSchema().getNamespaces()) { 493 String nsGroup = ns.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP); 494 if (nsGroup != null && nsGroup.equals(name)) { 495 throw new ConstraintException( 496 "RSGroup " + name + " is referenced by namespace: " + ns.getName()); 497 } 498 } 499 rsGroupInfoManager.removeRSGroup(name); 500 } 501 } 502 503 @Override 504 public boolean balanceRSGroup(String groupName) throws IOException { 505 ServerManager serverManager = master.getServerManager(); 506 LoadBalancer balancer = master.getLoadBalancer(); 507 508 synchronized (balancer) { 509 // If balance not true, don't run balancer. 510 if (!((HMaster) master).isBalancerOn()) { 511 return false; 512 } 513 514 if (getRSGroupInfo(groupName) == null) { 515 throw new ConstraintException("RSGroup does not exist: "+groupName); 516 } 517 // Only allow one balance run at at time. 518 Map<String, RegionState> groupRIT = rsGroupGetRegionsInTransition(groupName); 519 if (groupRIT.size() > 0) { 520 LOG.debug("Not running balancer because {} region(s) in transition: {}", groupRIT.size(), 521 StringUtils.abbreviate( 522 master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(), 523 256)); 524 return false; 525 } 526 if (serverManager.areDeadServersInProgress()) { 527 LOG.debug("Not running balancer because processing dead regionserver(s): {}", 528 serverManager.getDeadServers()); 529 return false; 530 } 531 532 //We balance per group instead of per table 533 Map<TableName, Map<ServerName, List<RegionInfo>>> assignmentsByTable = 534 getRSGroupAssignmentsByTable(master.getTableStateManager(), groupName); 535 List<RegionPlan> plans = balancer.balanceCluster(assignmentsByTable); 536 boolean balancerRan = !plans.isEmpty(); 537 if (balancerRan) { 538 LOG.info("RSGroup balance {} starting with plan count: {}", groupName, plans.size()); 539 master.executeRegionPlansWithThrottling(plans); 540 LOG.info("RSGroup balance " + groupName + " completed"); 541 } 542 return balancerRan; 543 } 544 } 545 546 @Override 547 public List<RSGroupInfo> listRSGroups() throws IOException { 548 return rsGroupInfoManager.listRSGroups(); 549 } 550 551 @Override 552 public RSGroupInfo getRSGroupOfServer(Address hostPort) throws IOException { 553 return rsGroupInfoManager.getRSGroupOfServer(hostPort); 554 } 555 556 @Override 557 public void moveServersAndTables(Set<Address> servers, Set<TableName> tables, String targetGroup) 558 throws IOException { 559 if (servers == null || servers.isEmpty()) { 560 throw new ConstraintException("The list of servers to move cannot be null or empty."); 561 } 562 if (tables == null || tables.isEmpty()) { 563 throw new ConstraintException("The list of tables to move cannot be null or empty."); 564 } 565 566 //check target group 567 getAndCheckRSGroupInfo(targetGroup); 568 569 // Hold a lock on the manager instance while moving servers and tables to prevent 570 // another writer changing our state while we are working. 571 synchronized (rsGroupInfoManager) { 572 //check servers and tables status 573 checkServersAndTables(servers, tables, targetGroup); 574 575 //Move servers and tables to a new group. 576 String srcGroup = getRSGroupOfServer(servers.iterator().next()).getName(); 577 rsGroupInfoManager.moveServersAndTables(servers, tables, srcGroup, targetGroup); 578 579 //move regions on these servers which do not belong to group tables 580 moveServerRegionsFromGroup(servers, tables, 581 rsGroupInfoManager.getRSGroup(srcGroup).getServers(), 582 rsGroupInfoManager.getRSGroup(targetGroup)); 583 //move regions of these tables which are not on group servers 584 moveTableRegionsToGroup(tables, rsGroupInfoManager.getRSGroup(targetGroup)); 585 } 586 LOG.info("Move servers and tables done. Severs: {}, Tables: {} => {}", servers, tables, 587 targetGroup); 588 } 589 590 @Override 591 public void removeServers(Set<Address> servers) throws IOException { 592 { 593 if (servers == null || servers.isEmpty()) { 594 throw new ConstraintException("The set of servers to remove cannot be null or empty."); 595 } 596 // Hold a lock on the manager instance while moving servers to prevent 597 // another writer changing our state while we are working. 598 synchronized (rsGroupInfoManager) { 599 //check the set of servers 600 checkForDeadOrOnlineServers(servers); 601 rsGroupInfoManager.removeServers(servers); 602 LOG.info("Remove decommissioned servers {} from RSGroup done", servers); 603 } 604 } 605 } 606 607 @Override 608 public void renameRSGroup(String oldName, String newName) throws IOException { 609 synchronized (rsGroupInfoManager) { 610 rsGroupInfoManager.renameRSGroup(oldName, newName); 611 } 612 } 613 614 @Override 615 public void updateRSGroupConfig(String groupName, Map<String, String> configuration) 616 throws IOException { 617 synchronized (rsGroupInfoManager) { 618 rsGroupInfoManager.updateRSGroupConfig(groupName, configuration); 619 } 620 } 621 622 private Map<String, RegionState> rsGroupGetRegionsInTransition(String groupName) 623 throws IOException { 624 Map<String, RegionState> rit = Maps.newTreeMap(); 625 AssignmentManager am = master.getAssignmentManager(); 626 for(TableName tableName : getRSGroupInfo(groupName).getTables()) { 627 for(RegionInfo regionInfo: am.getRegionStates().getRegionsOfTable(tableName)) { 628 RegionState state = am.getRegionStates().getRegionTransitionState(regionInfo); 629 if(state != null) { 630 rit.put(regionInfo.getEncodedName(), state); 631 } 632 } 633 } 634 return rit; 635 } 636 637 /** 638 * This is an EXPENSIVE clone. Cloning though is the safest thing to do. Can't let out original 639 * since it can change and at least the load balancer wants to iterate this exported list. Load 640 * balancer should iterate over this list because cloned list will ignore disabled table and split 641 * parent region cases. This method is invoked by {@link #balanceRSGroup} 642 * @return A clone of current assignments for this group. 643 */ 644 Map<TableName, Map<ServerName, List<RegionInfo>>> getRSGroupAssignmentsByTable( 645 TableStateManager tableStateManager, String groupName) throws IOException { 646 Map<TableName, Map<ServerName, List<RegionInfo>>> result = Maps.newHashMap(); 647 RSGroupInfo rsGroupInfo = getRSGroupInfo(groupName); 648 Map<TableName, Map<ServerName, List<RegionInfo>>> assignments = Maps.newHashMap(); 649 for (Map.Entry<RegionInfo, ServerName> entry : master.getAssignmentManager().getRegionStates() 650 .getRegionAssignments().entrySet()) { 651 TableName currTable = entry.getKey().getTable(); 652 ServerName currServer = entry.getValue(); 653 RegionInfo currRegion = entry.getKey(); 654 if (rsGroupInfo.getTables().contains(currTable)) { 655 if (tableStateManager.isTableState(currTable, TableState.State.DISABLED, 656 TableState.State.DISABLING)) { 657 continue; 658 } 659 if (currRegion.isSplitParent()) { 660 continue; 661 } 662 assignments.putIfAbsent(currTable, new HashMap<>()); 663 assignments.get(currTable).putIfAbsent(currServer, new ArrayList<>()); 664 assignments.get(currTable).get(currServer).add(currRegion); 665 } 666 } 667 668 Map<ServerName, List<RegionInfo>> serverMap = Maps.newHashMap(); 669 for(ServerName serverName: master.getServerManager().getOnlineServers().keySet()) { 670 if(rsGroupInfo.getServers().contains(serverName.getAddress())) { 671 serverMap.put(serverName, Collections.emptyList()); 672 } 673 } 674 675 // add all tables that are members of the group 676 for(TableName tableName : rsGroupInfo.getTables()) { 677 if(assignments.containsKey(tableName)) { 678 result.put(tableName, new HashMap<>()); 679 result.get(tableName).putAll(serverMap); 680 result.get(tableName).putAll(assignments.get(tableName)); 681 LOG.debug("Adding assignments for {}: {}", tableName, assignments.get(tableName)); 682 } 683 } 684 685 return result; 686 } 687 688 /** 689 * Check if the set of servers are belong to dead servers list or online servers list. 690 * @param servers servers to remove 691 */ 692 private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException { 693 // This uglyness is because we only have Address, not ServerName. 694 Set<Address> onlineServers = new HashSet<>(); 695 List<ServerName> drainingServers = master.getServerManager().getDrainingServersList(); 696 for (ServerName server : master.getServerManager().getOnlineServers().keySet()) { 697 // Only online but not decommissioned servers are really online 698 if (!drainingServers.contains(server)) { 699 onlineServers.add(server.getAddress()); 700 } 701 } 702 703 Set<Address> deadServers = new HashSet<>(); 704 for(ServerName server: master.getServerManager().getDeadServers().copyServerNames()) { 705 deadServers.add(server.getAddress()); 706 } 707 708 for (Address address: servers) { 709 if (onlineServers.contains(address)) { 710 throw new ConstraintException( 711 "Server " + address + " is an online server, not allowed to remove."); 712 } 713 if (deadServers.contains(address)) { 714 throw new ConstraintException( 715 "Server " + address + " is on the dead servers list," 716 + " Maybe it will come back again, not allowed to remove."); 717 } 718 } 719 } 720}