001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.HashMap;
024import java.util.HashSet;
025import java.util.LinkedList;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import java.util.concurrent.Future;
030import org.apache.commons.lang3.StringUtils;
031import org.apache.hadoop.hbase.NamespaceDescriptor;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.TableState;
036import org.apache.hadoop.hbase.constraint.ConstraintException;
037import org.apache.hadoop.hbase.master.HMaster;
038import org.apache.hadoop.hbase.master.LoadBalancer;
039import org.apache.hadoop.hbase.master.MasterServices;
040import org.apache.hadoop.hbase.master.RegionPlan;
041import org.apache.hadoop.hbase.master.RegionState;
042import org.apache.hadoop.hbase.master.ServerManager;
043import org.apache.hadoop.hbase.master.TableStateManager;
044import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
045import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
046import org.apache.hadoop.hbase.net.Address;
047import org.apache.hadoop.hbase.util.Pair;
048import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * Service to support Region Server Grouping (HBase-6721).
055 */
056@InterfaceAudience.Private
057public class RSGroupAdminServer implements RSGroupAdmin {
058  private static final Logger LOG = LoggerFactory.getLogger(RSGroupAdminServer.class);
059  public static final String KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE = "should keep at least " +
060          "one server in 'default' RSGroup.";
061
062  private MasterServices master;
063  private final RSGroupInfoManager rsGroupInfoManager;
064
065  public RSGroupAdminServer(MasterServices master, RSGroupInfoManager rsGroupInfoManager) {
066    this.master = master;
067    this.rsGroupInfoManager = rsGroupInfoManager;
068  }
069
070  @Override
071  public RSGroupInfo getRSGroupInfo(String groupName) throws IOException {
072    return rsGroupInfoManager.getRSGroup(groupName);
073  }
074
075  @Override
076  public RSGroupInfo getRSGroupInfoOfTable(TableName tableName) throws IOException {
077    // We are reading across two Maps in the below with out synchronizing across
078    // them; should be safe most of the time.
079    String groupName = rsGroupInfoManager.getRSGroupOfTable(tableName);
080    return groupName == null? null: rsGroupInfoManager.getRSGroup(groupName);
081  }
082
083  private void checkOnlineServersOnly(Set<Address> servers) throws ConstraintException {
084    // This uglyness is because we only have Address, not ServerName.
085    // Online servers are keyed by ServerName.
086    Set<Address> onlineServers = new HashSet<>();
087    for(ServerName server: master.getServerManager().getOnlineServers().keySet()) {
088      onlineServers.add(server.getAddress());
089    }
090    for (Address address: servers) {
091      if (!onlineServers.contains(address)) {
092        throw new ConstraintException(
093            "Server " + address + " is not an online server in 'default' RSGroup.");
094      }
095    }
096  }
097
098  /**
099   * Check passed name. Fail if nulls or if corresponding RSGroupInfo not found.
100   * @return The RSGroupInfo named <code>name</code>
101   */
102  private RSGroupInfo getAndCheckRSGroupInfo(String name) throws IOException {
103    if (StringUtils.isEmpty(name)) {
104      throw new ConstraintException("RSGroup cannot be null.");
105    }
106    RSGroupInfo rsGroupInfo = getRSGroupInfo(name);
107    if (rsGroupInfo == null) {
108      throw new ConstraintException("RSGroup does not exist: " + name);
109    }
110    return rsGroupInfo;
111  }
112
113  /**
114   * @return List of Regions associated with this <code>server</code>.
115   */
116  private List<RegionInfo> getRegions(final Address server) {
117    LinkedList<RegionInfo> regions = new LinkedList<>();
118    for (Map.Entry<RegionInfo, ServerName> el :
119        master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) {
120      if (el.getValue() == null) {
121        continue;
122      }
123
124      if (el.getValue().getAddress().equals(server)) {
125        addRegion(regions, el.getKey());
126      }
127    }
128    for (RegionStateNode state : master.getAssignmentManager().getRegionsInTransition()) {
129      if (state.getRegionLocation() != null &&
130          state.getRegionLocation().getAddress().equals(server)) {
131        addRegion(regions, state.getRegionInfo());
132      }
133    }
134    return regions;
135  }
136
137  private void addRegion(final LinkedList<RegionInfo> regions, RegionInfo hri) {
138    // If meta, move it last otherwise other unassigns fail because meta is not
139    // online for them to update state in. This is dodgy. Needs to be made more
140    // robust. See TODO below.
141    if (hri.isMetaRegion()) {
142      regions.addLast(hri);
143    } else {
144      regions.addFirst(hri);
145    }
146  }
147
148  /**
149   * Check servers and tables.
150   *
151   * @param servers servers to move
152   * @param tables tables to move
153   * @param targetGroupName target group name
154   * @throws IOException if nulls or if servers and tables not belong to the same group
155   */
156  private void checkServersAndTables(Set<Address> servers, Set<TableName> tables,
157                                     String targetGroupName) throws IOException {
158    // Presume first server's source group. Later ensure all servers are from this group.
159    Address firstServer = servers.iterator().next();
160    RSGroupInfo tmpSrcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer);
161    if (tmpSrcGrp == null) {
162      // Be careful. This exception message is tested for in TestRSGroupsBase...
163      throw new ConstraintException("Source RSGroup for server " + firstServer
164              + " does not exist.");
165    }
166    RSGroupInfo srcGrp = new RSGroupInfo(tmpSrcGrp);
167
168    // Only move online servers
169    checkOnlineServersOnly(servers);
170
171    // Ensure all servers are of same rsgroup.
172    for (Address server: servers) {
173      String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName();
174      if (!tmpGroup.equals(srcGrp.getName())) {
175        throw new ConstraintException("Move server request should only come from one source " +
176                "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup);
177      }
178    }
179
180    // Ensure all tables and servers are of same rsgroup.
181    for (TableName table : tables) {
182      String tmpGroup = rsGroupInfoManager.getRSGroupOfTable(table);
183      if (!tmpGroup.equals(srcGrp.getName())) {
184        throw new ConstraintException("Move table request should only come from one source " +
185                "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup);
186      }
187    }
188
189    if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > tables.size()) {
190      throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() +
191              " that contains tables without servers to host them.");
192    }
193  }
194
195  /**
196   * Move every region from servers which are currently located on these servers,
197   * but should not be located there.
198   *
199   * @param movedServers  the servers that are moved to new group
200   * @param movedTables the tables that are moved to new group
201   * @param srcGrpServers all servers in the source group, excluding the movedServers
202   * @param targetGrp     the target group
203   * @throws IOException if any error while moving regions
204   */
205  private void moveServerRegionsFromGroup(Set<Address> movedServers, Set<TableName> movedTables,
206    Set<Address> srcGrpServers, RSGroupInfo targetGrp) throws IOException {
207    // Get server names corresponding to given Addresses
208    List<ServerName> movedServerNames = new ArrayList<>(movedServers.size());
209    List<ServerName> srcGrpServerNames = new ArrayList<>(srcGrpServers.size());
210    for (ServerName serverName : master.getServerManager().getOnlineServers().keySet()) {
211      // In case region move failed in previous attempt, regionsOwners and newRegionsOwners
212      // can have the same servers. So for all servers below both conditions to be checked
213      if (srcGrpServers.contains(serverName.getAddress())) {
214        srcGrpServerNames.add(serverName);
215      }
216      if (movedServers.contains(serverName.getAddress())) {
217        movedServerNames.add(serverName);
218      }
219    }
220    // Set true to indicate at least one region movement failed
221    boolean errorInRegionMove;
222    List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>();
223    int retry = 0;
224    do {
225      errorInRegionMove = false;
226      for (ServerName server : movedServerNames) {
227        List<RegionInfo> regionsOnServer = getRegions(server.getAddress());
228        for (RegionInfo region : regionsOnServer) {
229          if (!movedTables.contains(region.getTable()) && !srcGrpServers
230            .contains(getRegionAddress(region))) {
231            LOG.info("Moving server region {}, which do not belong to RSGroup {}",
232              region.getShortNameToLog(), targetGrp.getName());
233            // Move region back to source RSGroup servers
234            ServerName dest =
235              this.master.getLoadBalancer().randomAssignment(region, srcGrpServerNames);
236            if (dest == null) {
237              errorInRegionMove = true;
238              continue;
239            }
240            RegionPlan rp = new RegionPlan(region, server, dest);
241            try {
242              Future<byte[]> future = this.master.getAssignmentManager().moveAsync(rp);
243              assignmentFutures.add(Pair.newPair(region, future));
244            } catch (Exception ioe) {
245              errorInRegionMove = true;
246              LOG.error("Move region {} from group failed, will retry, current retry time is {}",
247                region.getShortNameToLog(), retry, ioe);
248            }
249          }
250        }
251      }
252      boolean allRegionsMoved =
253        waitForRegionMovement(assignmentFutures, targetGrp.getName(), retry);
254      if (allRegionsMoved && !errorInRegionMove) {
255        LOG.info("All regions from server(s) {} moved to target group {}.", movedServerNames,
256          targetGrp.getName());
257        return;
258      } else {
259        retry++;
260        try {
261          rsGroupInfoManager.wait(1000);
262        } catch (InterruptedException e) {
263          LOG.warn("Sleep interrupted", e);
264          Thread.currentThread().interrupt();
265        }
266      }
267    } while (retry <= 50);
268  }
269
270  private Address getRegionAddress(RegionInfo hri) {
271    ServerName sn = master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri);
272    return sn.getAddress();
273  }
274
275  /**
276   * Wait for all the region move to complete. Keep waiting for other region movement
277   * completion even if some region movement fails.
278   */
279  private boolean waitForRegionMovement(List<Pair<RegionInfo, Future<byte[]>>> regionMoveFutures,
280    String tgtGrpName, int retryCount) {
281    LOG.info("Moving {} region(s) to group {}, current retry={}", regionMoveFutures.size(),
282      tgtGrpName, retryCount);
283    boolean allRegionsMoved = true;
284    for (Pair<RegionInfo, Future<byte[]>> pair : regionMoveFutures) {
285      try {
286        pair.getSecond().get();
287        if (master.getAssignmentManager().getRegionStates().
288          getRegionState(pair.getFirst()).isFailedOpen()) {
289          allRegionsMoved = false;
290        }
291      } catch (InterruptedException e) {
292        LOG.warn("Sleep interrupted", e);
293        // Dont return form there lets wait for other regions to complete movement.
294        allRegionsMoved = false;
295      } catch (Exception e) {
296        allRegionsMoved = false;
297        LOG.error("Move region {} to group {} failed, will retry on next attempt",
298          pair.getFirst().getShortNameToLog(), tgtGrpName, e);
299      }
300    }
301    return allRegionsMoved;
302  }
303
304  /**
305   * Moves regions of tables which are not on target group servers.
306   *
307   * @param tables    the tables that will move to new group
308   * @param targetGrp the target group
309   * @throws IOException if moving the region fails
310   */
311  private void moveTableRegionsToGroup(Set<TableName> tables, RSGroupInfo targetGrp)
312    throws IOException {
313    List<ServerName> targetGrpSevers = new ArrayList<>(targetGrp.getServers().size());
314    for (ServerName serverName : master.getServerManager().getOnlineServers().keySet()) {
315      if (targetGrp.getServers().contains(serverName.getAddress())) {
316        targetGrpSevers.add(serverName);
317      }
318    }
319    //Set true to indicate at least one region movement failed
320    boolean errorInRegionMove;
321    int retry = 0;
322    List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>();
323    do {
324      errorInRegionMove = false;
325      for (TableName table : tables) {
326        if (master.getTableStateManager().isTableState(table, TableState.State.DISABLED,
327          TableState.State.DISABLING)) {
328          LOG.debug("Skipping move regions because the table {} is disabled", table);
329          continue;
330        }
331        LOG.info("Moving region(s) for table {} to RSGroup {}", table, targetGrp.getName());
332        for (RegionInfo region : master.getAssignmentManager().getRegionStates()
333          .getRegionsOfTable(table)) {
334          ServerName sn =
335            master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(region);
336          if (!targetGrp.containsServer(sn.getAddress())) {
337            LOG.info("Moving region {} to RSGroup {}", region.getShortNameToLog(),
338              targetGrp.getName());
339            ServerName dest =
340              this.master.getLoadBalancer().randomAssignment(region, targetGrpSevers);
341            if (dest == null) {
342              errorInRegionMove = true;
343              continue;
344            }
345            RegionPlan rp = new RegionPlan(region, sn, dest);
346            try {
347              Future<byte[]> future = this.master.getAssignmentManager().moveAsync(rp);
348              assignmentFutures.add(Pair.newPair(region, future));
349            } catch (Exception ioe) {
350              errorInRegionMove = true;
351              LOG.error("Move region {} to group failed, will retry, current retry time is {}",
352                region.getShortNameToLog(), retry, ioe);
353            }
354
355          }
356        }
357      }
358      boolean allRegionsMoved =
359        waitForRegionMovement(assignmentFutures, targetGrp.getName(), retry);
360      if (allRegionsMoved && !errorInRegionMove) {
361        LOG.info("All regions from table(s) {} moved to target group {}.", tables,
362          targetGrp.getName());
363        return;
364      } else {
365        retry++;
366        try {
367          rsGroupInfoManager.wait(1000);
368        } catch (InterruptedException e) {
369          LOG.warn("Sleep interrupted", e);
370          Thread.currentThread().interrupt();
371        }
372      }
373    } while (retry <= 50);
374  }
375
376  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
377      value="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE",
378      justification="Ignoring complaint because don't know what it is complaining about")
379  @Override
380  public void moveServers(Set<Address> servers, String targetGroupName) throws IOException {
381    if (servers == null) {
382      throw new ConstraintException("The list of servers to move cannot be null.");
383    }
384    if (servers.isEmpty()) {
385      // For some reason this difference between null servers and isEmpty is important distinction.
386      // TODO. Why? Stuff breaks if I equate them.
387      return;
388    }
389    //check target group
390    getAndCheckRSGroupInfo(targetGroupName);
391
392    // Hold a lock on the manager instance while moving servers to prevent
393    // another writer changing our state while we are working.
394    synchronized (rsGroupInfoManager) {
395      // Presume first server's source group. Later ensure all servers are from this group.
396      Address firstServer = servers.iterator().next();
397      RSGroupInfo srcGrp = rsGroupInfoManager.getRSGroupOfServer(firstServer);
398      if (srcGrp == null) {
399        // Be careful. This exception message is tested for in TestRSGroupsBase...
400        throw new ConstraintException("Source RSGroup for server " + firstServer
401            + " does not exist.");
402      }
403      // Only move online servers (when moving from 'default') or servers from other
404      // groups. This prevents bogus servers from entering groups
405      if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName())) {
406        if (srcGrp.getServers().size() <= servers.size()) {
407          throw new ConstraintException(KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE);
408        }
409        checkOnlineServersOnly(servers);
410      }
411      // Ensure all servers are of same rsgroup.
412      for (Address server: servers) {
413        String tmpGroup = rsGroupInfoManager.getRSGroupOfServer(server).getName();
414        if (!tmpGroup.equals(srcGrp.getName())) {
415          throw new ConstraintException("Move server request should only come from one source " +
416              "RSGroup. Expecting only " + srcGrp.getName() + " but contains " + tmpGroup);
417        }
418      }
419      if (srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > 0) {
420        throw new ConstraintException("Cannot leave a RSGroup " + srcGrp.getName() +
421            " that contains tables without servers to host them.");
422      }
423
424      // MovedServers may be < passed in 'servers'.
425      Set<Address> movedServers = rsGroupInfoManager.moveServers(servers, srcGrp.getName(),
426          targetGroupName);
427      moveServerRegionsFromGroup(movedServers, Collections.emptySet(),
428        rsGroupInfoManager.getRSGroup(srcGrp.getName()).getServers(),
429        rsGroupInfoManager.getRSGroup(targetGroupName));
430      LOG.info("Move servers done: {} => {}", srcGrp.getName(), targetGroupName);
431    }
432  }
433
434  @Override
435  public void moveTables(Set<TableName> tables, String targetGroup) throws IOException {
436    if (tables == null) {
437      throw new ConstraintException("The list of tables cannot be null.");
438    }
439    if (tables.size() < 1) {
440      LOG.debug("moveTables() passed an empty set. Ignoring.");
441      return;
442    }
443
444    // Hold a lock on the manager instance while moving servers to prevent
445    // another writer changing our state while we are working.
446    synchronized (rsGroupInfoManager) {
447      if(targetGroup != null) {
448        RSGroupInfo destGroup = rsGroupInfoManager.getRSGroup(targetGroup);
449        if(destGroup == null) {
450          throw new ConstraintException("Target " + targetGroup + " RSGroup does not exist.");
451        }
452        if(destGroup.getServers().size() < 1) {
453          throw new ConstraintException("Target RSGroup must have at least one server.");
454        }
455      }
456      rsGroupInfoManager.moveTables(tables, targetGroup);
457
458      // targetGroup is null when a table is being deleted. In this case no further
459      // action is required.
460      if (targetGroup != null) {
461        moveTableRegionsToGroup(tables, rsGroupInfoManager.getRSGroup(targetGroup));
462      }
463    }
464  }
465
466  @Override
467  public void addRSGroup(String name) throws IOException {
468    rsGroupInfoManager.addRSGroup(new RSGroupInfo(name));
469  }
470
471  @Override
472  public void removeRSGroup(String name) throws IOException {
473    // Hold a lock on the manager instance while moving servers to prevent
474    // another writer changing our state while we are working.
475    synchronized (rsGroupInfoManager) {
476      RSGroupInfo rsGroupInfo = rsGroupInfoManager.getRSGroup(name);
477      if (rsGroupInfo == null) {
478        throw new ConstraintException("RSGroup " + name + " does not exist");
479      }
480      int tableCount = rsGroupInfo.getTables().size();
481      if (tableCount > 0) {
482        throw new ConstraintException("RSGroup " + name + " has " + tableCount +
483            " tables; you must remove these tables from the rsgroup before " +
484            "the rsgroup can be removed.");
485      }
486      int serverCount = rsGroupInfo.getServers().size();
487      if (serverCount > 0) {
488        throw new ConstraintException("RSGroup " + name + " has " + serverCount +
489            " servers; you must remove these servers from the RSGroup before" +
490            "the RSGroup can be removed.");
491      }
492      for (NamespaceDescriptor ns : master.getClusterSchema().getNamespaces()) {
493        String nsGroup = ns.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP);
494        if (nsGroup != null && nsGroup.equals(name)) {
495          throw new ConstraintException(
496            "RSGroup " + name + " is referenced by namespace: " + ns.getName());
497        }
498      }
499      rsGroupInfoManager.removeRSGroup(name);
500    }
501  }
502
503  @Override
504  public boolean balanceRSGroup(String groupName) throws IOException {
505    ServerManager serverManager = master.getServerManager();
506    LoadBalancer balancer = master.getLoadBalancer();
507
508    synchronized (balancer) {
509      // If balance not true, don't run balancer.
510      if (!((HMaster) master).isBalancerOn()) {
511        return false;
512      }
513
514      if (getRSGroupInfo(groupName) == null) {
515        throw new ConstraintException("RSGroup does not exist: "+groupName);
516      }
517      // Only allow one balance run at at time.
518      Map<String, RegionState> groupRIT = rsGroupGetRegionsInTransition(groupName);
519      if (groupRIT.size() > 0) {
520        LOG.debug("Not running balancer because {} region(s) in transition: {}", groupRIT.size(),
521            StringUtils.abbreviate(
522              master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(),
523              256));
524        return false;
525      }
526      if (serverManager.areDeadServersInProgress()) {
527        LOG.debug("Not running balancer because processing dead regionserver(s): {}",
528            serverManager.getDeadServers());
529        return false;
530      }
531
532      //We balance per group instead of per table
533      Map<TableName, Map<ServerName, List<RegionInfo>>> assignmentsByTable =
534          getRSGroupAssignmentsByTable(master.getTableStateManager(), groupName);
535      List<RegionPlan> plans = balancer.balanceCluster(assignmentsByTable);
536      boolean balancerRan = !plans.isEmpty();
537      if (balancerRan) {
538        LOG.info("RSGroup balance {} starting with plan count: {}", groupName, plans.size());
539        master.executeRegionPlansWithThrottling(plans);
540        LOG.info("RSGroup balance " + groupName + " completed");
541      }
542      return balancerRan;
543    }
544  }
545
546  @Override
547  public List<RSGroupInfo> listRSGroups() throws IOException {
548    return rsGroupInfoManager.listRSGroups();
549  }
550
551  @Override
552  public RSGroupInfo getRSGroupOfServer(Address hostPort) throws IOException {
553    return rsGroupInfoManager.getRSGroupOfServer(hostPort);
554  }
555
556  @Override
557  public void moveServersAndTables(Set<Address> servers, Set<TableName> tables, String targetGroup)
558      throws IOException {
559    if (servers == null || servers.isEmpty()) {
560      throw new ConstraintException("The list of servers to move cannot be null or empty.");
561    }
562    if (tables == null || tables.isEmpty()) {
563      throw new ConstraintException("The list of tables to move cannot be null or empty.");
564    }
565
566    //check target group
567    getAndCheckRSGroupInfo(targetGroup);
568
569    // Hold a lock on the manager instance while moving servers and tables to prevent
570    // another writer changing our state while we are working.
571    synchronized (rsGroupInfoManager) {
572      //check servers and tables status
573      checkServersAndTables(servers, tables, targetGroup);
574
575      //Move servers and tables to a new group.
576      String srcGroup = getRSGroupOfServer(servers.iterator().next()).getName();
577      rsGroupInfoManager.moveServersAndTables(servers, tables, srcGroup, targetGroup);
578
579      //move regions on these servers which do not belong to group tables
580      moveServerRegionsFromGroup(servers, tables,
581        rsGroupInfoManager.getRSGroup(srcGroup).getServers(),
582        rsGroupInfoManager.getRSGroup(targetGroup));
583      //move regions of these tables which are not on group servers
584      moveTableRegionsToGroup(tables, rsGroupInfoManager.getRSGroup(targetGroup));
585    }
586    LOG.info("Move servers and tables done. Severs: {}, Tables: {} => {}", servers, tables,
587        targetGroup);
588  }
589
590  @Override
591  public void removeServers(Set<Address> servers) throws IOException {
592    {
593      if (servers == null || servers.isEmpty()) {
594        throw new ConstraintException("The set of servers to remove cannot be null or empty.");
595      }
596      // Hold a lock on the manager instance while moving servers to prevent
597      // another writer changing our state while we are working.
598      synchronized (rsGroupInfoManager) {
599        //check the set of servers
600        checkForDeadOrOnlineServers(servers);
601        rsGroupInfoManager.removeServers(servers);
602        LOG.info("Remove decommissioned servers {} from RSGroup done", servers);
603      }
604    }
605  }
606
607  @Override
608  public void renameRSGroup(String oldName, String newName) throws IOException {
609    synchronized (rsGroupInfoManager) {
610      rsGroupInfoManager.renameRSGroup(oldName, newName);
611    }
612  }
613
614  @Override
615  public void updateRSGroupConfig(String groupName, Map<String, String> configuration)
616      throws IOException {
617    synchronized (rsGroupInfoManager) {
618      rsGroupInfoManager.updateRSGroupConfig(groupName, configuration);
619    }
620  }
621
622  private Map<String, RegionState> rsGroupGetRegionsInTransition(String groupName)
623      throws IOException {
624    Map<String, RegionState> rit = Maps.newTreeMap();
625    AssignmentManager am = master.getAssignmentManager();
626    for(TableName tableName : getRSGroupInfo(groupName).getTables()) {
627      for(RegionInfo regionInfo: am.getRegionStates().getRegionsOfTable(tableName)) {
628        RegionState state = am.getRegionStates().getRegionTransitionState(regionInfo);
629        if(state != null) {
630          rit.put(regionInfo.getEncodedName(), state);
631        }
632      }
633    }
634    return rit;
635  }
636
637  /**
638   * This is an EXPENSIVE clone. Cloning though is the safest thing to do. Can't let out original
639   * since it can change and at least the load balancer wants to iterate this exported list. Load
640   * balancer should iterate over this list because cloned list will ignore disabled table and split
641   * parent region cases. This method is invoked by {@link #balanceRSGroup}
642   * @return A clone of current assignments for this group.
643   */
644  Map<TableName, Map<ServerName, List<RegionInfo>>> getRSGroupAssignmentsByTable(
645      TableStateManager tableStateManager, String groupName) throws IOException {
646    Map<TableName, Map<ServerName, List<RegionInfo>>> result = Maps.newHashMap();
647    RSGroupInfo rsGroupInfo = getRSGroupInfo(groupName);
648    Map<TableName, Map<ServerName, List<RegionInfo>>> assignments = Maps.newHashMap();
649    for (Map.Entry<RegionInfo, ServerName> entry : master.getAssignmentManager().getRegionStates()
650        .getRegionAssignments().entrySet()) {
651      TableName currTable = entry.getKey().getTable();
652      ServerName currServer = entry.getValue();
653      RegionInfo currRegion = entry.getKey();
654      if (rsGroupInfo.getTables().contains(currTable)) {
655        if (tableStateManager.isTableState(currTable, TableState.State.DISABLED,
656          TableState.State.DISABLING)) {
657          continue;
658        }
659        if (currRegion.isSplitParent()) {
660          continue;
661        }
662        assignments.putIfAbsent(currTable, new HashMap<>());
663        assignments.get(currTable).putIfAbsent(currServer, new ArrayList<>());
664        assignments.get(currTable).get(currServer).add(currRegion);
665      }
666    }
667
668    Map<ServerName, List<RegionInfo>> serverMap = Maps.newHashMap();
669    for(ServerName serverName: master.getServerManager().getOnlineServers().keySet()) {
670      if(rsGroupInfo.getServers().contains(serverName.getAddress())) {
671        serverMap.put(serverName, Collections.emptyList());
672      }
673    }
674
675    // add all tables that are members of the group
676    for(TableName tableName : rsGroupInfo.getTables()) {
677      if(assignments.containsKey(tableName)) {
678        result.put(tableName, new HashMap<>());
679        result.get(tableName).putAll(serverMap);
680        result.get(tableName).putAll(assignments.get(tableName));
681        LOG.debug("Adding assignments for {}: {}", tableName, assignments.get(tableName));
682      }
683    }
684
685    return result;
686  }
687
688  /**
689   * Check if the set of servers are belong to dead servers list or online servers list.
690   * @param servers servers to remove
691   */
692  private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException {
693    // This uglyness is because we only have Address, not ServerName.
694    Set<Address> onlineServers = new HashSet<>();
695    List<ServerName> drainingServers = master.getServerManager().getDrainingServersList();
696    for (ServerName server : master.getServerManager().getOnlineServers().keySet()) {
697      // Only online but not decommissioned servers are really online
698      if (!drainingServers.contains(server)) {
699        onlineServers.add(server.getAddress());
700      }
701    }
702
703    Set<Address> deadServers = new HashSet<>();
704    for(ServerName server: master.getServerManager().getDeadServers().copyServerNames()) {
705      deadServers.add(server.getAddress());
706    }
707
708    for (Address address: servers) {
709      if (onlineServers.contains(address)) {
710        throw new ConstraintException(
711            "Server " + address + " is an online server, not allowed to remove.");
712      }
713      if (deadServers.contains(address)) {
714        throw new ConstraintException(
715            "Server " + address + " is on the dead servers list,"
716                + " Maybe it will come back again, not allowed to remove.");
717      }
718    }
719  }
720}