001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.lang.reflect.Field;
025import java.util.ArrayList;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.NamespaceDescriptor;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.Version;
036import org.apache.hadoop.hbase.Waiter;
037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.client.TableDescriptor;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor;
043import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
044import org.apache.hadoop.hbase.net.Address;
045import org.apache.hadoop.hbase.testclassification.MediumTests;
046import org.apache.hadoop.hbase.testclassification.RSGroupTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.JVMClusterUtil;
049import org.apache.hadoop.hbase.util.VersionInfo;
050import org.junit.After;
051import org.junit.AfterClass;
052import org.junit.Before;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
061
062@Category({ RSGroupTests.class, MediumTests.class })
063public class TestRSGroupsKillRS extends TestRSGroupsBase {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067    HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
068
069  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
070
071  @BeforeClass
072  public static void setUp() throws Exception {
073    // avoid all the handlers blocked when meta is offline, and regionServerReport can not be
074    // processed which causes dead lock.
075    TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
076    TEST_UTIL.getConfiguration()
077      .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f);
078    setUpTestBeforeClass();
079  }
080
081  @AfterClass
082  public static void tearDown() throws Exception {
083    tearDownAfterClass();
084  }
085
086  @Before
087  public void beforeMethod() throws Exception {
088    setUpBeforeMethod();
089  }
090
091  @After
092  public void afterMethod() throws Exception {
093    tearDownAfterMethod();
094  }
095
096  @Test
097  public void testKillRS() throws Exception {
098    RSGroupInfo appInfo = addGroup("appInfo", 1);
099    final TableName tableName =
100      TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName()));
101    ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
102      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
103    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
104      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
105    ADMIN.createTable(desc);
106    // wait for created table to be assigned
107    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
108      @Override
109      public boolean evaluate() throws Exception {
110        return getTableRegionMap().get(desc.getTableName()) != null;
111      }
112    });
113
114    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
115    assertEquals(1, ADMIN.getRegions(targetServer).size());
116
117    try {
118      // stopping may cause an exception
119      // due to the connection loss
120      ADMIN.stopRegionServer(targetServer.getAddress().toString());
121    } catch (Exception e) {
122    }
123    // wait until the server is actually down
124    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
125      @Override
126      public boolean evaluate() throws Exception {
127        return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
128      }
129    });
130    // there is only one rs in the group and we killed it, so the region can not be online, until
131    // later we add new servers to it.
132    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
133      @Override
134      public boolean evaluate() throws Exception {
135        return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
136      }
137    });
138    Set<Address> newServers = Sets.newHashSet();
139    newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
140    ADMIN.moveServersToRSGroup(newServers, appInfo.getName());
141
142    // Make sure all the table's regions get reassigned
143    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
144    ADMIN.disableTable(tableName);
145    ADMIN.enableTable(tableName);
146
147    // wait for region to be assigned
148    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
149      @Override
150      public boolean evaluate() throws Exception {
151        return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
152      }
153    });
154
155    ServerName targetServer1 = getServerName(newServers.iterator().next());
156    assertEquals(1, ADMIN.getRegions(targetServer1).size());
157    assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable());
158  }
159
160  @Test
161  public void testKillAllRSInGroup() throws Exception {
162    // create a rsgroup and move two regionservers to it
163    String groupName = "my_group";
164    int groupRSCount = 2;
165    addGroup(groupName, groupRSCount);
166
167    // create a table, and move it to my_group
168    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
169    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
170    Set<TableName> toAddTables = new HashSet<>();
171    toAddTables.add(tableName);
172    ADMIN.setRSGroup(toAddTables, groupName);
173    assertTrue(
174      ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName));
175    TEST_UTIL.waitTableAvailable(tableName, 30000);
176
177    // check my_group servers and table regions
178    Set<Address> servers = ADMIN.getRSGroup(groupName).getServers();
179    assertEquals(2, servers.size());
180    LOG.debug("group servers {}", servers);
181    for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates()
182      .getRegionsOfTable(tableName)) {
183      assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates()
184        .getRegionAssignments().get(tr).getAddress()));
185    }
186
187    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
188    // ('lastHost' of other regions are in 'default' group)
189    // and check if all table regions are online
190    List<ServerName> gsn = new ArrayList<>();
191    for (Address addr : servers) {
192      gsn.add(getServerName(addr));
193    }
194    assertEquals(2, gsn.size());
195    for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates()
196      .getRegionAssignments().entrySet()) {
197      if (entry.getKey().getTable().equals(tableName)) {
198        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
199          entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
200        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
201        break;
202      }
203    }
204    TEST_UTIL.waitTableAvailable(tableName, 30000);
205
206    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
207    // and then check if all table regions are online
208    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
209      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
210    }
211    // better wait for a while for region reassign
212    sleep(10000);
213    assertEquals(NUM_SLAVES_BASE - gsn.size(),
214      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
215    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
216      gsn.get(0).getPort());
217    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
218      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
219    TEST_UTIL.waitTableAvailable(tableName, 30000);
220
221    // case 2: stop all the regionservers in my_group, and move another
222    // regionserver(from the 'default' group) to my_group,
223    // and then check if all table regions are online
224    for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
225      .getLiveRegionServerThreads()) {
226      if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
227        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
228        break;
229      }
230    }
231    sleep(10000);
232    assertEquals(NUM_SLAVES_BASE - gsn.size(),
233      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
234    ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0);
235    ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName);
236    // wait and check if table regions are online
237    TEST_UTIL.waitTableAvailable(tableName, 30000);
238  }
239
240  @Test
241  public void testLowerMetaGroupVersion() throws Exception {
242    // create a rsgroup and move one regionserver to it
243    String groupName = "meta_group";
244    int groupRSCount = 1;
245    addGroup(groupName, groupRSCount);
246
247    // move hbase:meta to meta_group
248    Set<TableName> toAddTables = new HashSet<>();
249    toAddTables.add(TableName.META_TABLE_NAME);
250    ADMIN.setRSGroup(toAddTables, groupName);
251    assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond()
252      .contains(TableName.META_TABLE_NAME));
253
254    // restart the regionserver in meta_group, and lower its version
255    String originVersion = "";
256    Set<Address> servers = new HashSet<>();
257    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
258      servers.add(addr);
259      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
260      originVersion = MASTER.getRegionServerVersion(getServerName(addr));
261    }
262    // better wait for a while for region reassign
263    sleep(10000);
264    assertEquals(NUM_SLAVES_BASE - groupRSCount,
265      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
266    Address address = servers.iterator().next();
267    int majorVersion = VersionInfo.getMajorVersion(originVersion);
268    assertTrue(majorVersion >= 1);
269    String lowerVersion =
270      String.valueOf(majorVersion - 1) + originVersion.substring(originVersion.indexOf("."));
271    try {
272      setVersionInfoVersion(lowerVersion);
273      TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort());
274      assertEquals(NUM_SLAVES_BASE,
275        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
276      assertTrue(VersionInfo.compareVersion(originVersion,
277        MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
278      LOG.debug("wait for META assigned...");
279      // SCP finished, which means all regions assigned too.
280      TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures()
281        .stream().filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
282    } finally {
283      setVersionInfoVersion(Version.version);
284    }
285  }
286
287  private static void setVersionInfoVersion(String newValue) throws Exception {
288    Field f = VersionInfo.class.getDeclaredField("version");
289    f.setAccessible(true);
290    f.set(null, newValue);
291  }
292}