001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022
023import org.apache.hadoop.hbase.HBaseClusterInterface;
024import org.apache.hadoop.hbase.HBaseTestingUtil;
025import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
026import org.apache.hadoop.hbase.StartTestingClusterOption;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.Waiter;
029import org.apache.hadoop.hbase.client.Admin;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.master.HMaster;
032import org.apache.hadoop.hbase.master.ServerManager;
033import org.apache.hadoop.hbase.regionserver.HRegionServer;
034import org.apache.hadoop.hbase.testclassification.MediumTests;
035import org.apache.hadoop.hbase.testclassification.RSGroupTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.junit.jupiter.api.AfterAll;
038import org.junit.jupiter.api.BeforeAll;
039import org.junit.jupiter.api.Tag;
040import org.junit.jupiter.api.Test;
041import org.junit.jupiter.api.TestInfo;
042import org.slf4j.Logger;
043import org.slf4j.LoggerFactory;
044
045import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
046
047/**
048 * This tests that GroupBasedBalancer will use data in zk to do balancing during master startup.
049 * This does not test retain assignment.
050 * <p/>
051 * The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns
052 * 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup'
053 * table isn't available. It then kills the active master and waits for backup master to come
054 * online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected
055 * assignment with a timeout.
056 */
057@Tag(RSGroupTests.TAG)
058@Tag(MediumTests.TAG)
059public class TestRSGroupsOfflineMode extends TestRSGroupsBase {
060
061  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsOfflineMode.class);
062  private static HMaster master;
063  private static Admin hbaseAdmin;
064  private static HBaseTestingUtil TEST_UTIL;
065  private static HBaseClusterInterface cluster;
066  private final static long WAIT_TIMEOUT = 60000 * 5;
067
068  @BeforeAll
069  public static void setUp() throws Exception {
070    TEST_UTIL = new HBaseTestingUtil();
071    RSGroupUtil.enableRSGroup(TEST_UTIL.getConfiguration());
072    TEST_UTIL.getConfiguration().set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "1");
073    StartTestingClusterOption option =
074      StartTestingClusterOption.builder().numMasters(2).numRegionServers(3).numDataNodes(3).build();
075    TEST_UTIL.startMiniCluster(option);
076    cluster = TEST_UTIL.getHBaseCluster();
077    master = ((SingleProcessHBaseCluster) cluster).getMaster();
078    master.balanceSwitch(false);
079    hbaseAdmin = TEST_UTIL.getAdmin();
080    // wait till the balancer is in online mode
081    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
082      @Override
083      public boolean evaluate() throws Exception {
084        return master.isInitialized()
085          && ((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline()
086          && master.getServerManager().getOnlineServersList().size() >= 3;
087      }
088    });
089  }
090
091  @AfterAll
092  public static void tearDown() throws Exception {
093    TEST_UTIL.shutdownMiniCluster();
094  }
095
096  @Test
097  public void testOffline(TestInfo testInfo) throws Exception, InterruptedException {
098    // Table should be after group table name so it gets assigned later.
099    final TableName failoverTable =
100      TableName.valueOf(getNameWithoutIndex(testInfo.getTestMethod().get().getName()));
101    TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f"));
102    final HRegionServer killRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(0);
103    final HRegionServer groupRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(1);
104    final HRegionServer failoverRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(2);
105    String newGroup = "my_group";
106    Admin admin = TEST_UTIL.getAdmin();
107    admin.addRSGroup(newGroup);
108    if (
109      master.getAssignmentManager().getRegionStates().getRegionAssignments()
110        .containsValue(failoverRS.getServerName())
111    ) {
112      for (RegionInfo regionInfo : hbaseAdmin.getRegions(failoverRS.getServerName())) {
113        hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(), failoverRS.getServerName());
114      }
115      LOG.info("Waiting for region unassignments on failover RS...");
116      TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
117        @Override
118        public boolean evaluate() throws Exception {
119          return !master.getServerManager().getLoad(failoverRS.getServerName()).getRegionMetrics()
120            .isEmpty();
121        }
122      });
123    }
124
125    // Move server to group and make sure all tables are assigned.
126    admin.moveServersToRSGroup(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup);
127    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
128      @Override
129      public boolean evaluate() throws Exception {
130        return groupRS.getNumberOfOnlineRegions() < 1
131          && master.getAssignmentManager().getRegionsInTransitionCount() < 1;
132      }
133    });
134    // Move table to group and wait.
135    admin.setRSGroup(Sets.newHashSet(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME), newGroup);
136    LOG.info("Waiting for move table...");
137    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
138      @Override
139      public boolean evaluate() throws Exception {
140        return groupRS.getNumberOfOnlineRegions() == 1;
141      }
142    });
143
144    groupRS.stop("die");
145    // Race condition here.
146    TEST_UTIL.getHBaseCluster().getMaster().stopMaster();
147    LOG.info("Waiting for offline mode...");
148    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
149      @Override
150      public boolean evaluate() throws Exception {
151        return TEST_UTIL.getHBaseCluster().getMaster() != null
152          && TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster()
153          && TEST_UTIL.getHBaseCluster().getMaster().isInitialized()
154          && TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size()
155              <= 3;
156      }
157    });
158
159    // Get groupInfoManager from the new active master.
160    RSGroupInfoManager groupMgr =
161      ((SingleProcessHBaseCluster) cluster).getMaster().getRSGroupInfoManager();
162    // Make sure balancer is in offline mode, since this is what we're testing.
163    assertFalse(groupMgr.isOnline());
164    // Kill final regionserver to see the failover happens for all tables except GROUP table since
165    // it's group does not have any online RS.
166    killRS.stop("die");
167    master = TEST_UTIL.getHBaseCluster().getMaster();
168    LOG.info("Waiting for new table assignment...");
169    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
170      @Override
171      public boolean evaluate() throws Exception {
172        return failoverRS.getRegions(failoverTable).size() >= 1;
173      }
174    });
175    assertEquals(0, failoverRS.getRegions(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME).size());
176
177    // Need this for minicluster to shutdown cleanly.
178    master.stopMaster();
179  }
180}