001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022
023import org.apache.hadoop.hbase.HBaseClassTestRule;
024import org.apache.hadoop.hbase.HBaseClusterInterface;
025import org.apache.hadoop.hbase.HBaseTestingUtil;
026import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
027import org.apache.hadoop.hbase.StartTestingClusterOption;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.Waiter;
030import org.apache.hadoop.hbase.client.Admin;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.master.HMaster;
033import org.apache.hadoop.hbase.master.ServerManager;
034import org.apache.hadoop.hbase.regionserver.HRegionServer;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.testclassification.RSGroupTests;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.junit.AfterClass;
039import org.junit.BeforeClass;
040import org.junit.ClassRule;
041import org.junit.Rule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.junit.rules.TestName;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
049
050/**
051 * This tests that GroupBasedBalancer will use data in zk to do balancing during master startup.
052 * This does not test retain assignment.
053 * <p/>
054 * The tests brings up 3 RS, creates a new RS group 'my_group', moves 1 RS to 'my_group', assigns
055 * 'hbase:rsgroup' to 'my_group', and kill the only server in that group so that 'hbase:rsgroup'
056 * table isn't available. It then kills the active master and waits for backup master to come
057 * online. In new master, RSGroupInfoManagerImpl gets the data from zk and waits for the expected
058 * assignment with a timeout.
059 */
060@Category({ RSGroupTests.class, MediumTests.class })
061public class TestRSGroupsOfflineMode extends TestRSGroupsBase {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestRSGroupsOfflineMode.class);
066
067  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsOfflineMode.class);
068  private static HMaster master;
069  private static Admin hbaseAdmin;
070  private static HBaseTestingUtil TEST_UTIL;
071  private static HBaseClusterInterface cluster;
072  private final static long WAIT_TIMEOUT = 60000 * 5;
073
074  @Rule
075  public TestName name = new TestName();
076
077  @BeforeClass
078  public static void setUp() throws Exception {
079    TEST_UTIL = new HBaseTestingUtil();
080    RSGroupUtil.enableRSGroup(TEST_UTIL.getConfiguration());
081    TEST_UTIL.getConfiguration().set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "1");
082    StartTestingClusterOption option =
083      StartTestingClusterOption.builder().numMasters(2).numRegionServers(3).numDataNodes(3).build();
084    TEST_UTIL.startMiniCluster(option);
085    cluster = TEST_UTIL.getHBaseCluster();
086    master = ((SingleProcessHBaseCluster) cluster).getMaster();
087    master.balanceSwitch(false);
088    hbaseAdmin = TEST_UTIL.getAdmin();
089    // wait till the balancer is in online mode
090    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
091      @Override
092      public boolean evaluate() throws Exception {
093        return master.isInitialized() &&
094          ((RSGroupBasedLoadBalancer) master.getLoadBalancer()).isOnline() &&
095          master.getServerManager().getOnlineServersList().size() >= 3;
096      }
097    });
098  }
099
100  @AfterClass
101  public static void tearDown() throws Exception {
102    TEST_UTIL.shutdownMiniCluster();
103  }
104
105  @Test
106  public void testOffline() throws Exception, InterruptedException {
107    // Table should be after group table name so it gets assigned later.
108    final TableName failoverTable = TableName.valueOf(getNameWithoutIndex(name.getMethodName()));
109    TEST_UTIL.createTable(failoverTable, Bytes.toBytes("f"));
110    final HRegionServer killRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(0);
111    final HRegionServer groupRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(1);
112    final HRegionServer failoverRS = ((SingleProcessHBaseCluster) cluster).getRegionServer(2);
113    String newGroup = "my_group";
114    Admin admin = TEST_UTIL.getAdmin();
115    admin.addRSGroup(newGroup);
116    if (master.getAssignmentManager().getRegionStates().getRegionAssignments()
117      .containsValue(failoverRS.getServerName())) {
118      for (RegionInfo regionInfo : hbaseAdmin.getRegions(failoverRS.getServerName())) {
119        hbaseAdmin.move(regionInfo.getEncodedNameAsBytes(), failoverRS.getServerName());
120      }
121      LOG.info("Waiting for region unassignments on failover RS...");
122      TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
123        @Override
124        public boolean evaluate() throws Exception {
125          return !master.getServerManager().getLoad(failoverRS.getServerName()).getRegionMetrics()
126            .isEmpty();
127        }
128      });
129    }
130
131    // Move server to group and make sure all tables are assigned.
132    admin.moveServersToRSGroup(Sets.newHashSet(groupRS.getServerName().getAddress()), newGroup);
133    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
134      @Override
135      public boolean evaluate() throws Exception {
136        return groupRS.getNumberOfOnlineRegions() < 1 &&
137          master.getAssignmentManager().getRegionStates().getRegionsInTransitionCount() < 1;
138      }
139    });
140    // Move table to group and wait.
141    admin.setRSGroup(Sets.newHashSet(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME), newGroup);
142    LOG.info("Waiting for move table...");
143    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
144      @Override
145      public boolean evaluate() throws Exception {
146        return groupRS.getNumberOfOnlineRegions() == 1;
147      }
148    });
149
150    groupRS.stop("die");
151    // Race condition here.
152    TEST_UTIL.getHBaseCluster().getMaster().stopMaster();
153    LOG.info("Waiting for offline mode...");
154    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
155      @Override
156      public boolean evaluate() throws Exception {
157        return TEST_UTIL.getHBaseCluster().getMaster() != null &&
158          TEST_UTIL.getHBaseCluster().getMaster().isActiveMaster() &&
159          TEST_UTIL.getHBaseCluster().getMaster().isInitialized() &&
160          TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServers().size() <= 3;
161      }
162    });
163
164    // Get groupInfoManager from the new active master.
165    RSGroupInfoManager groupMgr =
166      ((SingleProcessHBaseCluster) cluster).getMaster().getRSGroupInfoManager();
167    // Make sure balancer is in offline mode, since this is what we're testing.
168    assertFalse(groupMgr.isOnline());
169    // Kill final regionserver to see the failover happens for all tables except GROUP table since
170    // it's group does not have any online RS.
171    killRS.stop("die");
172    master = TEST_UTIL.getHBaseCluster().getMaster();
173    LOG.info("Waiting for new table assignment...");
174    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
175      @Override
176      public boolean evaluate() throws Exception {
177        return failoverRS.getRegions(failoverTable).size() >= 1;
178      }
179    });
180    assertEquals(0, failoverRS.getRegions(RSGroupInfoManagerImpl.RSGROUP_TABLE_NAME).size());
181
182    // Need this for minicluster to shutdown cleanly.
183    master.stopMaster();
184  }
185}