001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022
023import org.apache.hadoop.hbase.HBaseClassTestRule;
024import org.apache.hadoop.hbase.HBaseCluster;
025import org.apache.hadoop.hbase.HBaseTestingUtility;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.MiniHBaseCluster;
028import org.apache.hadoop.hbase.StartMiniClusterOption;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.Waiter;
031import org.apache.hadoop.hbase.client.Admin;
032import org.apache.hadoop.hbase.client.RegionInfo;
033import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
034import org.apache.hadoop.hbase.master.HMaster;
035import org.apache.hadoop.hbase.master.ServerManager;
036import org.apache.hadoop.hbase.regionserver.HRegionServer;
037import org.apache.hadoop.hbase.testclassification.MediumTests;
038import org.apache.hadoop.hbase.util.Bytes;
039import org.junit.AfterClass;
040import org.junit.Assert;
041import org.junit.BeforeClass;
042import org.junit.ClassRule;
043import org.junit.Rule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046import org.junit.rules.TestName;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
051
052// This tests that GroupBasedBalancer will use data in zk to do balancing during master startup.
053// This does not test retain assignment.
054// The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns
055// 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup'
056// table isn't available. It then kills the active master and waits for backup master to come
057// online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected
058// assignment with a timeout.
059@Category(MediumTests.class)
060public class TestRSGroupsOfflineMode {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064    HBaseClassTestRule.forClass(TestRSGroupsOfflineMode.class);
065
066  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsOfflineMode.class);
067  private static HMaster master;
068  private static Admin hbaseAdmin;
069  private static HBaseTestingUtility TEST_UTIL;
070  private static HBaseCluster cluster;
071  private final static long WAIT_TIMEOUT = 60000 * 5;
072
073  @Rule
074  public TestName name = new TestName();
075
076  @BeforeClass
077  public static void setUp() throws Exception {
078    TEST_UTIL = new HBaseTestingUtility();
079    TEST_UTIL.getConfiguration().set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
080      RSGroupBasedLoadBalancer.class.getName());
081    TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
082      RSGroupAdminEndpoint.class.getName());
083    TEST_UTIL.getConfiguration().set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "1");
084    StartMiniClusterOption option =
085      StartMiniClusterOption.builder().numMasters(2).numRegionServers(3).numDataNodes(3).build();
086    TEST_UTIL.startMiniCluster(option);
087    cluster = TEST_UTIL.getHBaseCluster();
088    master = ((MiniHBaseCluster) cluster).getMaster();
089    master.balanceSwitch(false);
090    hbaseAdmin = TEST_UTIL.getAdmin();
091    // wait till the balancer is in online mode
092    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
093      @Override
094      public boolean evaluate() throws Exception {
095        return master.isInitialized()
096          && ((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline()
097          && master.getServerManager().getOnlineServersList().size() >= 3;
098      }
099    });
100  }
101
102  @AfterClass
103  public static void tearDown() throws Exception {
104    TEST_UTIL.shutdownMiniCluster();
105  }
106
107  @Test
108  public void testOffline() throws Exception, InterruptedException {
109    // Table should be after group table name so it gets assigned later.
110    final TableName failoverTable = TableName.valueOf(name.getMethodName());
111    TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f"));
112    final HRegionServer killRS = ((MiniHBaseCluster) cluster).getRegionServer(0);
113    final HRegionServer groupRS = ((MiniHBaseCluster) cluster).getRegionServer(1);
114    final HRegionServer failoverRS = ((MiniHBaseCluster) cluster).getRegionServer(2);
115    String newGroup = "my_group";
116    RSGroupAdmin groupAdmin = new RSGroupAdminClient(TEST_UTIL.getConnection());
117    groupAdmin.addRSGroup(newGroup);
118    if (
119      master.getAssignmentManager().getRegionStates().getRegionAssignments()
120        .containsValue(failoverRS.getServerName())
121    ) {
122      for (RegionInfo regionInfo : hbaseAdmin.getRegions(failoverRS.getServerName())) {
123        hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(), failoverRS.getServerName());
124      }
125      LOG.info("Waiting for region unassignments on failover RS...");
126      TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
127        @Override
128        public boolean evaluate() throws Exception {
129          return !master.getServerManager().getLoad(failoverRS.getServerName()).getRegionMetrics()
130            .isEmpty();
131        }
132      });
133    }
134
135    // Move server to group and make sure all tables are assigned.
136    groupAdmin.moveServers(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup);
137    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
138      @Override
139      public boolean evaluate() throws Exception {
140        return groupRS.getNumberOfOnlineRegions() < 1
141          && master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() < 1;
142      }
143    });
144    // Move table to group and wait.
145    groupAdmin.moveTables(Sets.newHashSet(RSGroupInfoManager.RSGROUP_TABLE_NAME), newGroup);
146    LOG.info("Waiting for move table...");
147    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
148      @Override
149      public boolean evaluate() throws Exception {
150        return groupRS.getNumberOfOnlineRegions() == 1;
151      }
152    });
153
154    groupRS.stop("die");
155    // Race condition here.
156    TEST_UTIL.getHBaseCluster().getMaster().stopMaster();
157    LOG.info("Waiting for offline mode...");
158    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
159      @Override
160      public boolean evaluate() throws Exception {
161        return TEST_UTIL.getHBaseCluster().getMaster() != null
162          && TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster()
163          && TEST_UTIL.getHBaseCluster().getMaster().isInitialized()
164          && TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size()
165              <= 3;
166      }
167    });
168
169    // Get groupInfoManager from the new active master.
170    RSGroupInfoManager groupMgr = ((MiniHBaseCluster) cluster).getMaster()
171      .getMasterCoprocessorHost().findCoprocessor(RSGroupAdminEndpoint.class).getGroupInfoManager();
172    // Make sure balancer is in offline mode, since this is what we're testing.
173    assertFalse(groupMgr.isOnline());
174    // Verify the group affiliation that's loaded from ZK instead of tables.
175    assertEquals(newGroup, groupMgr.getRSGroupOfTable(RSGroupInfoManager.RSGROUP_TABLE_NAME));
176    assertEquals(RSGroupInfo.DEFAULT_GROUP, groupMgr.getRSGroupOfTable(failoverTable));
177    // Kill final regionserver to see the failover happens for all tables except GROUP table since
178    // it's group does not have any online RS.
179    killRS.stop("die");
180    master = TEST_UTIL.getHBaseCluster().getMaster();
181    LOG.info("Waiting for new table assignment...");
182    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
183      @Override
184      public boolean evaluate() throws Exception {
185        return failoverRS.getRegions(failoverTable).size() >= 1;
186      }
187    });
188    Assert.assertEquals(0, failoverRS.getRegions(RSGroupInfoManager.RSGROUP_TABLE_NAME).size());
189
190    // Need this for minicluster to shutdown cleanly.
191    master.stopMaster();
192  }
193}