001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022import static org.junit.jupiter.api.Assertions.assertFalse;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024import static org.junit.jupiter.api.Assertions.fail;
025
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.EnumSet;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Random;
033import java.util.Set;
034import java.util.concurrent.ThreadLocalRandom;
035import java.util.concurrent.atomic.AtomicBoolean;
036import java.util.function.Function;
037import org.apache.hadoop.hbase.ClusterMetrics.Option;
038import org.apache.hadoop.hbase.ServerName;
039import org.apache.hadoop.hbase.TableName;
040import org.apache.hadoop.hbase.Waiter;
041import org.apache.hadoop.hbase.client.RegionInfo;
042import org.apache.hadoop.hbase.constraint.ConstraintException;
043import org.apache.hadoop.hbase.master.RegionState;
044import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
045import org.apache.hadoop.hbase.net.Address;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.RSGroupTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
050import org.apache.hadoop.hbase.util.Pair;
051import org.junit.jupiter.api.AfterAll;
052import org.junit.jupiter.api.AfterEach;
053import org.junit.jupiter.api.BeforeAll;
054import org.junit.jupiter.api.BeforeEach;
055import org.junit.jupiter.api.Tag;
056import org.junit.jupiter.api.Test;
057import org.junit.jupiter.api.TestInfo;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
062import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
063
064@Tag(RSGroupTests.TAG)
065@Tag(LargeTests.TAG)
066public class TestRSGroupsAdmin2 extends TestRSGroupsBase {
067
068  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsAdmin2.class);
069
070  @BeforeAll
071  public static void setUp() throws Exception {
072    setUpTestBeforeClass();
073  }
074
075  @AfterAll
076  public static void tearDown() throws Exception {
077    tearDownAfterClass();
078  }
079
080  @BeforeEach
081  public void beforeMethod(TestInfo testInfo) throws Exception {
082    setUpBeforeMethod(testInfo);
083  }
084
085  @AfterEach
086  public void afterMethod() throws Exception {
087    tearDownAfterMethod();
088  }
089
090  @Test
091  public void testRegionMove() throws Exception {
092    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
093    final byte[] familyNameBytes = Bytes.toBytes("f");
094    // All the regions created below will be assigned to the default group.
095    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, 6);
096    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
097      @Override
098      public boolean evaluate() throws Exception {
099        List<String> regions = getTableRegionMap().get(tableName);
100        if (regions == null) {
101          return false;
102        }
103
104        return getTableRegionMap().get(tableName).size() >= 6;
105      }
106    });
107
108    // get target region to move
109    Map<ServerName, List<String>> assignMap = getTableServerRegionMap().get(tableName);
110    String targetRegion = null;
111    for (ServerName server : assignMap.keySet()) {
112      targetRegion = assignMap.get(server).size() > 0 ? assignMap.get(server).get(0) : null;
113      if (targetRegion != null) {
114        break;
115      }
116    }
117    // get server which is not a member of new group
118    ServerName tmpTargetServer = null;
119    for (ServerName server : ADMIN.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
120      .getLiveServerMetrics().keySet()) {
121      if (!newGroup.containsServer(server.getAddress())) {
122        tmpTargetServer = server;
123        break;
124      }
125    }
126    final ServerName targetServer = tmpTargetServer;
127    // move target server to group
128    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()), newGroup.getName());
129    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
130      @Override
131      public boolean evaluate() throws Exception {
132        return ADMIN.getRegions(targetServer).size() <= 0;
133      }
134    });
135
136    // Lets move this region to the new group.
137    TEST_UTIL.getAdmin()
138      .move(Bytes.toBytes(RegionInfo.encodeRegionName(Bytes.toBytes(targetRegion))), targetServer);
139    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
140      @Override
141      public boolean evaluate() throws Exception {
142        return getTableRegionMap().get(tableName) != null
143          && getTableRegionMap().get(tableName).size() == 6
144          && ADMIN.getClusterMetrics(EnumSet.of(Option.REGIONS_IN_TRANSITION))
145            .getRegionStatesInTransition().size() < 1;
146      }
147    });
148
149    // verify that targetServer didn't open it
150    for (RegionInfo region : ADMIN.getRegions(targetServer)) {
151      if (targetRegion.equals(region.getRegionNameAsString())) {
152        fail("Target server opened region");
153      }
154    }
155  }
156
157  @Test
158  public void testRegionServerMove() throws IOException, InterruptedException {
159    int initNumGroups = ADMIN.listRSGroups().size();
160    RSGroupInfo appInfo = addGroup(getGroupName(name.getMethodName()), 1);
161    RSGroupInfo adminInfo = addGroup(getGroupName(name.getMethodName()), 1);
162    RSGroupInfo dInfo = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
163    assertEquals(initNumGroups + 2, ADMIN.listRSGroups().size());
164    assertEquals(1, adminInfo.getServers().size());
165    assertEquals(1, appInfo.getServers().size());
166    assertEquals(getNumServers() - 2, dInfo.getServers().size());
167    ADMIN.moveServersToRSGroup(appInfo.getServers(), RSGroupInfo.DEFAULT_GROUP);
168    ADMIN.removeRSGroup(appInfo.getName());
169    ADMIN.moveServersToRSGroup(adminInfo.getServers(), RSGroupInfo.DEFAULT_GROUP);
170    ADMIN.removeRSGroup(adminInfo.getName());
171    assertEquals(ADMIN.listRSGroups().size(), initNumGroups);
172  }
173
174  @Test
175  public void testMoveServers() throws Exception {
176    // create groups and assign servers
177    addGroup("bar", 3);
178    ADMIN.addRSGroup("foo");
179
180    RSGroupInfo barGroup = ADMIN.getRSGroup("bar");
181    RSGroupInfo fooGroup = ADMIN.getRSGroup("foo");
182    assertEquals(3, barGroup.getServers().size());
183    assertEquals(0, fooGroup.getServers().size());
184
185    // test fail bogus server move
186    try {
187      ADMIN.moveServersToRSGroup(Sets.newHashSet(Address.fromString("foo:9999")), "foo");
188      fail("Bogus servers shouldn't have been successfully moved.");
189    } catch (IOException ex) {
190      String exp = "Server foo:9999 is either offline or it does not exist.";
191      String msg = "Expected '" + exp + "' in exception message: ";
192      assertTrue(ex.getMessage().contains(exp), msg + " " + ex.getMessage());
193    }
194
195    // test success case
196    LOG.info("moving servers " + barGroup.getServers() + " to group foo");
197    ADMIN.moveServersToRSGroup(barGroup.getServers(), fooGroup.getName());
198
199    barGroup = ADMIN.getRSGroup("bar");
200    fooGroup = ADMIN.getRSGroup("foo");
201    assertEquals(0, barGroup.getServers().size());
202    assertEquals(3, fooGroup.getServers().size());
203
204    LOG.info("moving servers " + fooGroup.getServers() + " to group default");
205    ADMIN.moveServersToRSGroup(fooGroup.getServers(), RSGroupInfo.DEFAULT_GROUP);
206
207    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
208      @Override
209      public boolean evaluate() throws Exception {
210        return getNumServers() == ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
211      }
212    });
213
214    fooGroup = ADMIN.getRSGroup("foo");
215    assertEquals(0, fooGroup.getServers().size());
216
217    // test group removal
218    LOG.info("Remove group " + barGroup.getName());
219    ADMIN.removeRSGroup(barGroup.getName());
220    assertEquals(null, ADMIN.getRSGroup(barGroup.getName()));
221    LOG.info("Remove group " + fooGroup.getName());
222    ADMIN.removeRSGroup(fooGroup.getName());
223    assertEquals(null, ADMIN.getRSGroup(fooGroup.getName()));
224  }
225
226  @Test
227  public void testRemoveServers() throws Exception {
228    LOG.info("testRemoveServers");
229    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
230    Iterator<Address> iterator = newGroup.getServers().iterator();
231    ServerName targetServer = getServerName(iterator.next());
232
233    // remove online servers
234    try {
235      ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
236      fail("Online servers shouldn't have been successfully removed.");
237    } catch (IOException ex) {
238      String exp =
239        "Server " + targetServer.getAddress() + " is an online server, not allowed to remove.";
240      String msg = "Expected '" + exp + "' in exception message: ";
241      assertTrue(ex.getMessage().contains(exp), msg + " " + ex.getMessage());
242    }
243    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
244
245    // remove dead servers
246    NUM_DEAD_SERVERS = CLUSTER.getClusterMetrics().getDeadServerNames().size();
247    try {
248      // stopping may cause an exception
249      // due to the connection loss
250      LOG.info("stopping server " + targetServer.getServerName());
251      ADMIN.stopRegionServer(targetServer.getAddress().toString());
252      NUM_DEAD_SERVERS++;
253    } catch (Exception e) {
254    }
255
256    // wait for stopped regionserver to dead server list
257    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
258      @Override
259      public boolean evaluate() throws Exception {
260        return !MASTER.getServerManager().areDeadServersInProgress()
261          && CLUSTER.getClusterMetrics().getDeadServerNames().size() == NUM_DEAD_SERVERS;
262      }
263    });
264
265    try {
266      ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
267      fail("Dead servers shouldn't have been successfully removed.");
268    } catch (IOException ex) {
269      String exp = "Server " + targetServer.getAddress() + " is on the dead servers list,"
270        + " Maybe it will come back again, not allowed to remove.";
271      String msg = "Expected '" + exp + "' in exception message: ";
272      assertTrue(ex.getMessage().contains(exp), msg + " " + ex.getMessage());
273    }
274    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
275
276    // remove decommissioned servers
277    List<ServerName> serversToDecommission = new ArrayList<>();
278    targetServer = getServerName(iterator.next());
279    assertTrue(MASTER.getServerManager().getOnlineServers().containsKey(targetServer));
280    serversToDecommission.add(targetServer);
281
282    ADMIN.decommissionRegionServers(serversToDecommission, true);
283    assertEquals(1, ADMIN.listDecommissionedRegionServers().size());
284
285    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
286    ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
287    Set<Address> newGroupServers = ADMIN.getRSGroup(newGroup.getName()).getServers();
288    assertFalse(newGroupServers.contains(targetServer.getAddress()));
289    assertEquals(2, newGroupServers.size());
290
291    assertTrue(OBSERVER.preRemoveServersCalled);
292    assertTrue(OBSERVER.postRemoveServersCalled);
293  }
294
295  @Test
296  public void testMoveServersAndTables() throws Exception {
297    LOG.info("testMoveServersAndTables");
298    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
299    // create table
300    final byte[] familyNameBytes = Bytes.toBytes("f");
301    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, 5);
302    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
303      @Override
304      public boolean evaluate() throws Exception {
305        List<String> regions = getTableRegionMap().get(tableName);
306        if (regions == null) {
307          return false;
308        }
309
310        return getTableRegionMap().get(tableName).size() >= 5;
311      }
312    });
313
314    // get server which is not a member of new group
315    ServerName targetServer = null;
316    for (ServerName server : ADMIN.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
317      .getLiveServerMetrics().keySet()) {
318      if (
319        !newGroup.containsServer(server.getAddress())
320          && !ADMIN.getRSGroup("master").containsServer(server.getAddress())
321      ) {
322        targetServer = server;
323        break;
324      }
325    }
326
327    LOG.debug("Print group info : " + ADMIN.listRSGroups());
328    int oldDefaultGroupServerSize = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
329    int oldDefaultGroupTableSize = ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP).size();
330    assertTrue(OBSERVER.preListTablesInRSGroupCalled);
331    assertTrue(OBSERVER.postListTablesInRSGroupCalled);
332
333    // test fail bogus server move
334    try {
335      ADMIN.moveServersToRSGroup(Sets.newHashSet(Address.fromString("foo:9999")),
336        newGroup.getName());
337      ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
338      fail("Bogus servers shouldn't have been successfully moved.");
339    } catch (IOException ex) {
340      String exp = "Server foo:9999 is either offline or it does not exist.";
341      String msg = "Expected '" + exp + "' in exception message: ";
342      assertTrue(ex.getMessage().contains(exp), msg + " " + ex.getMessage());
343    }
344
345    // test move when src = dst
346    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()),
347      RSGroupInfo.DEFAULT_GROUP);
348    ADMIN.setRSGroup(Sets.newHashSet(tableName), RSGroupInfo.DEFAULT_GROUP);
349
350    // verify default group info
351    assertEquals(oldDefaultGroupServerSize,
352      ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size());
353    assertEquals(oldDefaultGroupTableSize,
354      ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP).size());
355
356    // verify new group info
357    assertEquals(1, ADMIN.getRSGroup(newGroup.getName()).getServers().size());
358    assertEquals(0,
359      ADMIN.getConfiguredNamespacesAndTablesInRSGroup(newGroup.getName()).getSecond().size());
360    assertTrue(OBSERVER.preGetConfiguredNamespacesAndTablesInRSGroupCalled);
361    assertTrue(OBSERVER.postGetConfiguredNamespacesAndTablesInRSGroupCalled);
362
363    // get all region to move targetServer
364    List<String> regionList = getTableRegionMap().get(tableName);
365    for (String region : regionList) {
366      // Lets move this region to the targetServer
367      TEST_UTIL.getAdmin().move(Bytes.toBytes(RegionInfo.encodeRegionName(Bytes.toBytes(region))),
368        targetServer);
369    }
370
371    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
372      @Override
373      public boolean evaluate() throws Exception {
374        return getTableRegionMap().get(tableName) != null
375          && getTableRegionMap().get(tableName).size() == 5
376          && getTableServerRegionMap().get(tableName).size() == 1
377          && ADMIN.getClusterMetrics(EnumSet.of(Option.REGIONS_IN_TRANSITION))
378            .getRegionStatesInTransition().size() < 1;
379      }
380    });
381
382    // verify that all region move to targetServer
383    assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size());
384
385    // move targetServer and table to newGroup
386    LOG.info("moving server and table to newGroup");
387    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()), newGroup.getName());
388    ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
389
390    // verify group change
391    assertEquals(newGroup.getName(), ADMIN.getRSGroup(tableName).getName());
392
393    // verify servers' not exist in old group
394    Set<Address> defaultServers = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers();
395    assertFalse(defaultServers.contains(targetServer.getAddress()));
396
397    // verify servers' exist in new group
398    Set<Address> newGroupServers = ADMIN.getRSGroup(newGroup.getName()).getServers();
399    assertTrue(newGroupServers.contains(targetServer.getAddress()));
400
401    // verify tables' not exist in old group
402    Set<TableName> defaultTables =
403      Sets.newHashSet(ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP));
404    assertFalse(defaultTables.contains(tableName));
405
406    // verify tables' exist in new group
407    Set<TableName> newGroupTables = Sets
408      .newHashSet(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(newGroup.getName()).getSecond());
409    assertTrue(newGroupTables.contains(tableName));
410
411    // verify that all region still assign on targetServer
412    // TODO: uncomment after we reimplement moveServersAndTables, now the implementation is
413    // moveToRSGroup first and then moveTables, so the region will be moved to other region servers.
414    // assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size());
415
416    assertTrue(OBSERVER.preMoveServersCalled);
417    assertTrue(OBSERVER.postMoveServersCalled);
418  }
419
420  @Test
421  public void testMoveServersFromDefaultGroup() throws Exception {
422    // create groups and assign servers
423    ADMIN.addRSGroup("foo");
424
425    RSGroupInfo fooGroup = ADMIN.getRSGroup("foo");
426    assertEquals(0, fooGroup.getServers().size());
427    RSGroupInfo defaultGroup = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
428
429    // test remove all servers from default
430    try {
431      ADMIN.moveServersToRSGroup(defaultGroup.getServers(), fooGroup.getName());
432      fail(RSGroupInfoManagerImpl.KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE);
433    } catch (ConstraintException ex) {
434      assertTrue(
435        ex.getMessage().contains(RSGroupInfoManagerImpl.KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE));
436    }
437
438    // test success case, remove one server from default ,keep at least one server
439    if (defaultGroup.getServers().size() > 1) {
440      Address serverInDefaultGroup = defaultGroup.getServers().iterator().next();
441      LOG.info("moving server " + serverInDefaultGroup + " from group default to group "
442        + fooGroup.getName());
443      ADMIN.moveServersToRSGroup(Sets.newHashSet(serverInDefaultGroup), fooGroup.getName());
444    }
445
446    fooGroup = ADMIN.getRSGroup("foo");
447    LOG.info("moving servers " + fooGroup.getServers() + " to group default");
448    ADMIN.moveServersToRSGroup(fooGroup.getServers(), RSGroupInfo.DEFAULT_GROUP);
449
450    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
451      @Override
452      public boolean evaluate() throws Exception {
453        return getNumServers() == ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
454      }
455    });
456
457    fooGroup = ADMIN.getRSGroup("foo");
458    assertEquals(0, fooGroup.getServers().size());
459
460    // test group removal
461    LOG.info("Remove group " + fooGroup.getName());
462    ADMIN.removeRSGroup(fooGroup.getName());
463    assertEquals(null, ADMIN.getRSGroup(fooGroup.getName()));
464  }
465
466  @Test
467  public void testFailedMoveBeforeRetryExhaustedWhenMoveServer() throws Exception {
468    String groupName = getGroupName(name.getMethodName());
469    ADMIN.addRSGroup(groupName);
470    final RSGroupInfo newGroup = ADMIN.getRSGroup(groupName);
471    Pair<ServerName, RegionStateNode> gotPair = createTableWithRegionSplitting(newGroup, 10);
472
473    // start thread to recover region state
474    final ServerName movedServer = gotPair.getFirst();
475    final RegionStateNode rsn = gotPair.getSecond();
476    AtomicBoolean changed = new AtomicBoolean(false);
477    Thread t1 = recoverRegionStateThread(movedServer,
478      server -> MASTER.getAssignmentManager().getRegionsOnServer(movedServer), rsn, changed);
479    t1.start();
480
481    // move target server to group
482    Thread t2 = new Thread(() -> {
483      LOG.info("thread2 start running, to move regions");
484      try {
485        ADMIN.moveServersToRSGroup(Sets.newHashSet(movedServer.getAddress()), newGroup.getName());
486      } catch (IOException e) {
487        LOG.error("move server error", e);
488      }
489    });
490    t2.start();
491
492    t1.join();
493    t2.join();
494
495    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
496      @Override
497      public boolean evaluate() {
498        if (changed.get()) {
499          return MASTER.getAssignmentManager().getRegionsOnServer(movedServer).size() == 0
500            && !rsn.getRegionLocation().equals(movedServer);
501        }
502        return false;
503      }
504    });
505  }
506
507  private <T> Thread recoverRegionStateThread(T owner, Function<T, List<RegionInfo>> getRegions,
508    RegionStateNode rsn, AtomicBoolean changed) {
509    return new Thread(() -> {
510      LOG.info("thread1 start running, will recover region state");
511      long current = EnvironmentEdgeManager.currentTime();
512      // wait until there is only left the region we changed state and recover its state.
513      // wait time is set according to the number of max retries, all except failed regions will be
514      // moved in one retry, and will sleep 1s until next retry.
515      while (
516        EnvironmentEdgeManager.currentTime() - current
517            <= RSGroupInfoManagerImpl.DEFAULT_MAX_RETRY_VALUE * 1000
518      ) {
519        List<RegionInfo> regions = getRegions.apply(owner);
520        LOG.debug("server table region size is:{}", regions.size());
521        assert regions.size() >= 1;
522        // when there is exactly one region left, we can determine the move operation encountered
523        // exception caused by the strange region state.
524        if (regions.size() == 1) {
525          assertEquals(regions.get(0).getRegionNameAsString(),
526            rsn.getRegionInfo().getRegionNameAsString());
527          rsn.setState(RegionState.State.OPEN);
528          LOG.info("set region {} state OPEN", rsn.getRegionInfo().getRegionNameAsString());
529          changed.set(true);
530          break;
531        }
532        sleep(5000);
533      }
534    });
535  }
536
537  private Pair<ServerName, RegionStateNode> createTableWithRegionSplitting(RSGroupInfo rsGroupInfo,
538    int tableRegionCount) throws Exception {
539    final byte[] familyNameBytes = Bytes.toBytes("f");
540    // All the regions created below will be assigned to the default group.
541    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, tableRegionCount);
542    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
543      @Override
544      public boolean evaluate() throws Exception {
545        List<String> regions = getTableRegionMap().get(tableName);
546        if (regions == null) {
547          return false;
548        }
549        return getTableRegionMap().get(tableName).size() >= tableRegionCount;
550      }
551    });
552
553    return randomlySetOneRegionStateToSplitting(rsGroupInfo);
554  }
555
556  /**
557   * Randomly choose a region to set state.
558   * @param newGroup target group
559   * @return source server of region, and region state
560   * @throws IOException if methods called throw
561   */
562  private Pair<ServerName, RegionStateNode>
563    randomlySetOneRegionStateToSplitting(RSGroupInfo newGroup) throws IOException {
564    // get target server to move, which should has more than one regions
565    // randomly set a region state to SPLITTING to make move fail
566    return randomlySetRegionState(newGroup, RegionState.State.SPLITTING, tableName);
567  }
568
569  private Pair<ServerName, RegionStateNode> randomlySetRegionState(RSGroupInfo groupInfo,
570    RegionState.State state, TableName... tableNames) throws IOException {
571    Preconditions.checkArgument(tableNames.length == 1 || tableNames.length == 2,
572      "only support one or two tables");
573    Map<TableName, Map<ServerName, List<String>>> tableServerRegionMap = getTableServerRegionMap();
574    Map<ServerName, List<String>> assignMap = tableServerRegionMap.get(tableNames[0]);
575    if (tableNames.length == 2) {
576      Map<ServerName, List<String>> assignMap2 = tableServerRegionMap.get(tableNames[1]);
577      assignMap2.forEach((k, v) -> {
578        if (!assignMap.containsKey(k)) {
579          assignMap.remove(k);
580        }
581      });
582    }
583    String toCorrectRegionName = null;
584    ServerName srcServer = null;
585    for (ServerName server : assignMap.keySet()) {
586      toCorrectRegionName =
587        assignMap.get(server).size() >= 1 && !groupInfo.containsServer(server.getAddress())
588          ? assignMap.get(server).get(0)
589          : null;
590      if (toCorrectRegionName != null) {
591        srcServer = server;
592        break;
593      }
594    }
595    assert srcServer != null;
596    RegionInfo toCorrectRegionInfo = TEST_UTIL.getMiniHBaseCluster().getMaster()
597      .getAssignmentManager().getRegionInfo(Bytes.toBytesBinary(toCorrectRegionName));
598    RegionStateNode rsn = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
599      .getRegionStates().getRegionStateNode(toCorrectRegionInfo);
600    rsn.setState(state);
601    return new Pair<>(srcServer, rsn);
602  }
603
604  @Test
605  public void testFailedMoveServersAndRepair() throws Exception {
606    // This UT calls moveToRSGroup() twice to test the idempotency of it.
607    // The first time, movement fails because a region is made in SPLITTING state
608    // which will not be moved.
609    // The second time, the region state is OPEN and check if all
610    // regions on target group servers after the call.
611    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
612
613    // create table
614    // randomly set a region state to SPLITTING to make move abort
615    Pair<ServerName, RegionStateNode> gotPair =
616      createTableWithRegionSplitting(newGroup, ThreadLocalRandom.current().nextInt(8) + 4);
617    RegionStateNode rsn = gotPair.getSecond();
618    ServerName srcServer = rsn.getRegionLocation();
619
620    // move server to newGroup and check regions
621    try {
622      ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
623      fail("should get IOException when retry exhausted but there still exists failed moved "
624        + "regions");
625    } catch (Exception e) {
626      assertTrue(
627        e.getMessage().contains(gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
628    }
629    for (RegionInfo regionInfo : MASTER.getAssignmentManager().getAssignedRegions()) {
630      if (regionInfo.getTable().equals(tableName) && regionInfo.equals(rsn.getRegionInfo())) {
631        assertEquals(
632          MASTER.getAssignmentManager().getRegionStates().getRegionServerOfRegion(regionInfo),
633          srcServer);
634      }
635    }
636
637    // retry move server to newGroup and check if all regions on srcServer was moved
638    rsn.setState(RegionState.State.OPEN);
639    ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
640    assertEquals(MASTER.getAssignmentManager().getRegionsOnServer(srcServer).size(), 0);
641  }
642
643  @Test
644  public void testFailedMoveServersTablesAndRepair() throws Exception {
645    // This UT calls moveTablesAndServers() twice to test the idempotency of it.
646    // The first time, movement fails because a region is made in SPLITTING state
647    // which will not be moved.
648    // The second time, the region state is OPEN and check if all
649    // regions on target group servers after the call.
650    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
651    // create table
652    final byte[] familyNameBytes = Bytes.toBytes("f");
653    TableName table1 = TableName.valueOf(tableName.getNameAsString() + "_1");
654    TableName table2 = TableName.valueOf(tableName.getNameAsString() + "_2");
655    Random rand = ThreadLocalRandom.current();
656    TEST_UTIL.createMultiRegionTable(table1, familyNameBytes, rand.nextInt(12) + 4);
657    TEST_UTIL.createMultiRegionTable(table2, familyNameBytes, rand.nextInt(12) + 4);
658
659    // randomly set a region state to SPLITTING to make move abort
660    Pair<ServerName, RegionStateNode> gotPair =
661      randomlySetRegionState(newGroup, RegionState.State.SPLITTING, table1, table2);
662    RegionStateNode rsn = gotPair.getSecond();
663    ServerName srcServer = rsn.getRegionLocation();
664
665    // move server and table to newGroup and check regions
666    try {
667      ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
668      ADMIN.setRSGroup(Sets.newHashSet(table2), newGroup.getName());
669      fail("should get IOException when retry exhausted but there still exists failed moved "
670        + "regions");
671    } catch (Exception e) {
672      assertTrue(
673        e.getMessage().contains(gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
674    }
675    for (RegionInfo regionInfo : MASTER.getAssignmentManager().getAssignedRegions()) {
676      if (regionInfo.getTable().equals(table1) && regionInfo.equals(rsn.getRegionInfo())) {
677        assertEquals(
678          MASTER.getAssignmentManager().getRegionStates().getRegionServerOfRegion(regionInfo),
679          srcServer);
680      }
681    }
682
683    // retry moveServersAndTables to newGroup and check if all regions on srcServer belongs to
684    // table2
685    rsn.setState(RegionState.State.OPEN);
686    ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
687    ADMIN.setRSGroup(Sets.newHashSet(table2), newGroup.getName());
688    for (RegionInfo regionsInfo : MASTER.getAssignmentManager().getRegionsOnServer(srcServer)) {
689      assertEquals(regionsInfo.getTable(), table2);
690    }
691  }
692
693  @Test
694  public void testMoveServersToRSGroupPerformance() throws Exception {
695    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 2);
696    final byte[] familyNameBytes = Bytes.toBytes("f");
697    // there will be 100 regions are both the serves
698    final int tableRegionCount = 200;
699    // All the regions created below will be assigned to the default group.
700    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, tableRegionCount);
701    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
702      @Override
703      public boolean evaluate() throws Exception {
704        List<String> regions = getTableRegionMap().get(tableName);
705        if (regions == null) {
706          return false;
707        }
708        return getTableRegionMap().get(tableName).size() >= tableRegionCount;
709      }
710    });
711    ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
712    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
713    String rsGroup2 = "rsGroup2";
714    ADMIN.addRSGroup(rsGroup2);
715
716    long startTime = EnvironmentEdgeManager.currentTime();
717    ADMIN.moveServersToRSGroup(Sets.newHashSet(newGroup.getServers().iterator().next()), rsGroup2);
718    long timeTaken = EnvironmentEdgeManager.currentTime() - startTime;
719    String msg =
720      "Should not take mote than 15000 ms to move a table with 100 regions. Time taken  ="
721        + timeTaken + " ms";
722    // This test case is meant to be used for verifying the performance quickly by a developer.
723    // Moving 100 regions takes much less than 15000 ms. Given 15000 ms so test cases passes
724    // on all environment.
725    assertTrue(timeTaken < 15000, msg);
726    LOG.info("Time taken to move a table with 100 region is {} ms", timeTaken);
727  }
728}