001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.lang.reflect.Field;
025import java.util.ArrayList;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.NamespaceDescriptor;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.Version;
035import org.apache.hadoop.hbase.Waiter;
036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.Table;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor;
042import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
043import org.apache.hadoop.hbase.net.Address;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.testclassification.RSGroupTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.JVMClusterUtil;
048import org.apache.hadoop.hbase.util.VersionInfo;
049import org.junit.jupiter.api.AfterAll;
050import org.junit.jupiter.api.AfterEach;
051import org.junit.jupiter.api.BeforeAll;
052import org.junit.jupiter.api.BeforeEach;
053import org.junit.jupiter.api.Tag;
054import org.junit.jupiter.api.Test;
055import org.junit.jupiter.api.TestInfo;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
060
061@Tag(RSGroupTests.TAG)
062@Tag(LargeTests.TAG)
063public class TestRSGroupsKillRS extends TestRSGroupsBase {
064
065  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
066
067  @BeforeAll
068  public static void setUp() throws Exception {
069    // avoid all the handlers blocked when meta is offline, and regionServerReport can not be
070    // processed which causes dead lock.
071    TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
072    TEST_UTIL.getConfiguration()
073      .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f);
074    setUpTestBeforeClass();
075  }
076
077  @AfterAll
078  public static void tearDown() throws Exception {
079    tearDownAfterClass();
080  }
081
082  @BeforeEach
083  public void beforeMethod(TestInfo testInfo) throws Exception {
084    setUpBeforeMethod(testInfo);
085  }
086
087  @AfterEach
088  public void afterMethod() throws Exception {
089    tearDownAfterMethod();
090  }
091
092  @Test
093  public void testKillRS() throws Exception {
094    RSGroupInfo appInfo = addGroup("appInfo", 1);
095    final TableName tableName =
096      TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName()));
097    ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
098      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
099    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
100      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
101    ADMIN.createTable(desc);
102    // wait for created table to be assigned
103    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
104      @Override
105      public boolean evaluate() throws Exception {
106        return getTableRegionMap().get(desc.getTableName()) != null;
107      }
108    });
109
110    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
111    assertEquals(1, ADMIN.getRegions(targetServer).size());
112
113    try {
114      // stopping may cause an exception
115      // due to the connection loss
116      ADMIN.stopRegionServer(targetServer.getAddress().toString());
117    } catch (Exception e) {
118    }
119    // wait until the server is actually down
120    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
121      @Override
122      public boolean evaluate() throws Exception {
123        return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
124      }
125    });
126    // there is only one rs in the group and we killed it, so the region can not be online, until
127    // later we add new servers to it.
128    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
129      @Override
130      public boolean evaluate() throws Exception {
131        return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
132      }
133    });
134    Set<Address> newServers = Sets.newHashSet();
135    newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
136    ADMIN.moveServersToRSGroup(newServers, appInfo.getName());
137
138    // Make sure all the table's regions get reassigned
139    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
140    ADMIN.disableTable(tableName);
141    ADMIN.enableTable(tableName);
142
143    // wait for region to be assigned
144    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
145      @Override
146      public boolean evaluate() throws Exception {
147        return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
148      }
149    });
150
151    ServerName targetServer1 = getServerName(newServers.iterator().next());
152    assertEquals(1, ADMIN.getRegions(targetServer1).size());
153    assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable());
154  }
155
156  @Test
157  public void testKillAllRSInGroup() throws Exception {
158    // create a rsgroup and move two regionservers to it
159    String groupName = "my_group";
160    int groupRSCount = 2;
161    addGroup(groupName, groupRSCount);
162
163    // create a table, and move it to my_group
164    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
165    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
166    Set<TableName> toAddTables = new HashSet<>();
167    toAddTables.add(tableName);
168    ADMIN.setRSGroup(toAddTables, groupName);
169    assertTrue(
170      ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName));
171    TEST_UTIL.waitTableAvailable(tableName, 30000);
172
173    // check my_group servers and table regions
174    Set<Address> servers = ADMIN.getRSGroup(groupName).getServers();
175    assertEquals(2, servers.size());
176    LOG.debug("group servers {}", servers);
177    for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates()
178      .getRegionsOfTable(tableName)) {
179      assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates()
180        .getRegionAssignments().get(tr).getAddress()));
181    }
182
183    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
184    // ('lastHost' of other regions are in 'default' group)
185    // and check if all table regions are online
186    List<ServerName> gsn = new ArrayList<>();
187    for (Address addr : servers) {
188      gsn.add(getServerName(addr));
189    }
190    assertEquals(2, gsn.size());
191    for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates()
192      .getRegionAssignments().entrySet()) {
193      if (entry.getKey().getTable().equals(tableName)) {
194        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
195          entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
196        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
197        break;
198      }
199    }
200    TEST_UTIL.waitTableAvailable(tableName, 30000);
201
202    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
203    // and then check if all table regions are online
204    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
205      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
206    }
207    // better wait for a while for region reassign
208    sleep(10000);
209    assertEquals(NUM_SLAVES_BASE - gsn.size(),
210      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
211    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
212      gsn.get(0).getPort());
213    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
214      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
215    TEST_UTIL.waitTableAvailable(tableName, 30000);
216
217    // case 2: stop all the regionservers in my_group, and move another
218    // regionserver(from the 'default' group) to my_group,
219    // and then check if all table regions are online
220    for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
221      .getLiveRegionServerThreads()) {
222      if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
223        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
224        break;
225      }
226    }
227    sleep(10000);
228    assertEquals(NUM_SLAVES_BASE - gsn.size(),
229      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
230    ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0);
231    ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName);
232    // wait and check if table regions are online
233    TEST_UTIL.waitTableAvailable(tableName, 30000);
234  }
235
236  @Test
237  public void testLowerMetaGroupVersion() throws Exception {
238    // create a rsgroup and move one regionserver to it
239    String groupName = "meta_group";
240    int groupRSCount = 1;
241    addGroup(groupName, groupRSCount);
242
243    // move hbase:meta to meta_group
244    Set<TableName> toAddTables = new HashSet<>();
245    toAddTables.add(TableName.META_TABLE_NAME);
246    ADMIN.setRSGroup(toAddTables, groupName);
247    assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond()
248      .contains(TableName.META_TABLE_NAME));
249
250    // restart the regionserver in meta_group, and lower its version
251    String originVersion = "";
252    Set<Address> servers = new HashSet<>();
253    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
254      servers.add(addr);
255      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
256      originVersion = MASTER.getRegionServerVersion(getServerName(addr));
257    }
258    // better wait for a while for region reassign
259    sleep(10000);
260    assertEquals(NUM_SLAVES_BASE - groupRSCount,
261      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
262    Address address = servers.iterator().next();
263    int majorVersion = VersionInfo.getMajorVersion(originVersion);
264    assertTrue(majorVersion >= 1);
265    String lowerVersion =
266      String.valueOf(majorVersion - 1) + originVersion.substring(originVersion.indexOf("."));
267    try {
268      setVersionInfoVersion(lowerVersion);
269      TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort());
270      assertEquals(NUM_SLAVES_BASE,
271        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
272      assertTrue(VersionInfo.compareVersion(originVersion,
273        MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
274      LOG.debug("wait for META assigned...");
275      // SCP finished, which means all regions assigned too.
276      TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures()
277        .stream().filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
278    } finally {
279      setVersionInfoVersion(Version.version);
280    }
281  }
282
283  private static void setVersionInfoVersion(String newValue) throws Exception {
284    Field f = VersionInfo.class.getDeclaredField("version");
285    f.setAccessible(true);
286    f.set(null, newValue);
287  }
288}