001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.lang.reflect.Field;
025import java.lang.reflect.Modifier;
026import java.util.ArrayList;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.NamespaceDescriptor;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.Version;
037import org.apache.hadoop.hbase.Waiter;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor;
044import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
045import org.apache.hadoop.hbase.net.Address;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.JVMClusterUtil;
049import org.apache.hadoop.hbase.util.VersionInfo;
050import org.junit.After;
051import org.junit.AfterClass;
052import org.junit.Before;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
061
062@Category({ LargeTests.class })
063public class TestRSGroupsKillRS extends TestRSGroupsBase {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067    HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
068
069  protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
070
071  @BeforeClass
072  public static void setUp() throws Exception {
073    // avoid all the handlers blocked when meta is offline, and regionServerReport can not be
074    // processed which causes dead lock.
075    TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
076    TEST_UTIL.getConfiguration()
077      .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f);
078    setUpTestBeforeClass();
079  }
080
081  @AfterClass
082  public static void tearDown() throws Exception {
083    tearDownAfterClass();
084  }
085
086  @Before
087  public void beforeMethod() throws Exception {
088    setUpBeforeMethod();
089  }
090
091  @After
092  public void afterMethod() throws Exception {
093    tearDownAfterMethod();
094  }
095
096  @Test
097  public void testKillRS() throws Exception {
098    RSGroupInfo appInfo = addGroup("appInfo", 1);
099    final TableName tableName = TableName.valueOf(tablePrefix + "_ns", name.getMethodName());
100    admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
101      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
102    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
103      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
104    admin.createTable(desc);
105    // wait for created table to be assigned
106    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
107      @Override
108      public boolean evaluate() throws Exception {
109        return getTableRegionMap().get(desc.getTableName()) != null;
110      }
111    });
112
113    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
114    assertEquals(1, admin.getRegions(targetServer).size());
115
116    try {
117      // stopping may cause an exception
118      // due to the connection loss
119      admin.stopRegionServer(targetServer.getAddress().toString());
120    } catch (Exception e) {
121    }
122    // wait until the server is actually down
123    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
124      @Override
125      public boolean evaluate() throws Exception {
126        return !cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
127      }
128    });
129    // there is only one rs in the group and we killed it, so the region can not be online, until
130    // later we add new servers to it.
131    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
132      @Override
133      public boolean evaluate() throws Exception {
134        return !cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
135      }
136    });
137    Set<Address> newServers = Sets.newHashSet();
138    newServers
139      .add(rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
140    rsGroupAdmin.moveServers(newServers, appInfo.getName());
141
142    // Make sure all the table's regions get reassigned
143    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
144    admin.disableTable(tableName);
145    admin.enableTable(tableName);
146
147    // wait for region to be assigned
148    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
149      @Override
150      public boolean evaluate() throws Exception {
151        return cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
152      }
153    });
154
155    ServerName targetServer1 = getServerName(newServers.iterator().next());
156    assertEquals(1, admin.getRegions(targetServer1).size());
157    assertEquals(tableName, admin.getRegions(targetServer1).get(0).getTable());
158  }
159
160  @Test
161  public void testKillAllRSInGroup() throws Exception {
162    // create a rsgroup and move two regionservers to it
163    String groupName = "my_group";
164    int groupRSCount = 2;
165    addGroup(groupName, groupRSCount);
166
167    // create a table, and move it to my_group
168    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
169    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
170    Set<TableName> toAddTables = new HashSet<>();
171    toAddTables.add(tableName);
172    rsGroupAdmin.moveTables(toAddTables, groupName);
173    assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName));
174    TEST_UTIL.waitTableAvailable(tableName, 30000);
175
176    // check my_group servers and table regions
177    Set<Address> servers = rsGroupAdmin.getRSGroupInfo(groupName).getServers();
178    assertEquals(2, servers.size());
179    LOG.debug("group servers {}", servers);
180    for (RegionInfo tr : master.getAssignmentManager().getRegionStates()
181      .getRegionsOfTable(tableName)) {
182      assertTrue(servers.contains(master.getAssignmentManager().getRegionStates()
183        .getRegionAssignments().get(tr).getAddress()));
184    }
185
186    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
187    // ('lastHost' of other regions are in 'default' group)
188    // and check if all table regions are online
189    List<ServerName> gsn = new ArrayList<>();
190    for (Address addr : servers) {
191      gsn.add(getServerName(addr));
192    }
193    assertEquals(2, gsn.size());
194    for (Map.Entry<RegionInfo, ServerName> entry : master.getAssignmentManager().getRegionStates()
195      .getRegionAssignments().entrySet()) {
196      if (entry.getKey().getTable().equals(tableName)) {
197        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
198          entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
199        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
200        break;
201      }
202    }
203    TEST_UTIL.waitTableAvailable(tableName, 30000);
204
205    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
206    // and then check if all table regions are online
207    for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
208      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
209    }
210    // better wait for a while for region reassign
211    sleep(10000);
212    assertEquals(NUM_SLAVES_BASE - gsn.size(),
213      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
214    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
215      gsn.get(0).getPort());
216    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
217      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
218    TEST_UTIL.waitTableAvailable(tableName, 30000);
219
220    // case 2: stop all the regionservers in my_group, and move another
221    // regionserver(from the 'default' group) to my_group,
222    // and then check if all table regions are online
223    for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
224      .getLiveRegionServerThreads()) {
225      if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
226        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
227        break;
228      }
229    }
230    sleep(10000);
231    assertEquals(NUM_SLAVES_BASE - gsn.size(),
232      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
233    ServerName newServer = master.getServerManager().getOnlineServersList().get(0);
234    rsGroupAdmin.moveServers(Sets.newHashSet(newServer.getAddress()), groupName);
235    // wait and check if table regions are online
236    TEST_UTIL.waitTableAvailable(tableName, 30000);
237  }
238
239  @Test
240  public void testLowerMetaGroupVersion() throws Exception {
241    // create a rsgroup and move one regionserver to it
242    String groupName = "meta_group";
243    int groupRSCount = 1;
244    addGroup(groupName, groupRSCount);
245
246    // move hbase:meta to meta_group
247    Set<TableName> toAddTables = new HashSet<>();
248    toAddTables.add(TableName.META_TABLE_NAME);
249    rsGroupAdmin.moveTables(toAddTables, groupName);
250    assertTrue(
251      rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(TableName.META_TABLE_NAME));
252
253    // restart the regionserver in meta_group, and lower its version
254    String originVersion = "";
255    Set<Address> servers = new HashSet<>();
256    for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
257      servers.add(addr);
258      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
259      originVersion = master.getRegionServerVersion(getServerName(addr));
260    }
261    // better wait for a while for region reassign
262    sleep(10000);
263    assertEquals(NUM_SLAVES_BASE - groupRSCount,
264      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
265    Address address = servers.iterator().next();
266    int majorVersion = VersionInfo.getMajorVersion(originVersion);
267    assertTrue(majorVersion >= 1);
268    String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1];
269    setFinalStatic(Version.class.getField("version"), lowerVersion);
270    TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort());
271    assertEquals(NUM_SLAVES_BASE,
272      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
273    assertTrue(VersionInfo.compareVersion(originVersion,
274      master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
275    LOG.debug("wait for META assigned...");
276    // SCP finished, which means all regions assigned too.
277    TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream()
278      .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
279  }
280
281  private static void setFinalStatic(Field field, Object newValue) throws Exception {
282    field.setAccessible(true);
283    Field modifiersField = Field.class.getDeclaredField("modifiers");
284    modifiersField.setAccessible(true);
285    modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
286    field.set(null, newValue);
287  }
288}