001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.lang.reflect.Field;
025import java.lang.reflect.Modifier;
026import java.util.ArrayList;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.NamespaceDescriptor;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.Version;
037import org.apache.hadoop.hbase.Waiter;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor;
044import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
045import org.apache.hadoop.hbase.net.Address;
046import org.apache.hadoop.hbase.testclassification.MediumTests;
047import org.apache.hadoop.hbase.testclassification.RSGroupTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.JVMClusterUtil;
050import org.apache.hadoop.hbase.util.ReflectionUtils;
051import org.apache.hadoop.hbase.util.VersionInfo;
052import org.junit.After;
053import org.junit.AfterClass;
054import org.junit.Before;
055import org.junit.BeforeClass;
056import org.junit.ClassRule;
057import org.junit.Test;
058import org.junit.experimental.categories.Category;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
063
064@Category({ RSGroupTests.class, MediumTests.class })
065public class TestRSGroupsKillRS extends TestRSGroupsBase {
066
067  @ClassRule
068  public static final HBaseClassTestRule CLASS_RULE =
069    HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
070
071  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
072
073  @BeforeClass
074  public static void setUp() throws Exception {
075    // avoid all the handlers blocked when meta is offline, and regionServerReport can not be
076    // processed which causes dead lock.
077    TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
078    TEST_UTIL.getConfiguration()
079      .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f);
080    setUpTestBeforeClass();
081  }
082
083  @AfterClass
084  public static void tearDown() throws Exception {
085    tearDownAfterClass();
086  }
087
088  @Before
089  public void beforeMethod() throws Exception {
090    setUpBeforeMethod();
091  }
092
093  @After
094  public void afterMethod() throws Exception {
095    tearDownAfterMethod();
096  }
097
098  @Test
099  public void testKillRS() throws Exception {
100    RSGroupInfo appInfo = addGroup("appInfo", 1);
101    final TableName tableName =
102      TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName()));
103    ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
104      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
105    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
106      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
107    ADMIN.createTable(desc);
108    // wait for created table to be assigned
109    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
110      @Override
111      public boolean evaluate() throws Exception {
112        return getTableRegionMap().get(desc.getTableName()) != null;
113      }
114    });
115
116    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
117    assertEquals(1, ADMIN.getRegions(targetServer).size());
118
119    try {
120      // stopping may cause an exception
121      // due to the connection loss
122      ADMIN.stopRegionServer(targetServer.getAddress().toString());
123    } catch (Exception e) {
124    }
125    // wait until the server is actually down
126    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
127      @Override
128      public boolean evaluate() throws Exception {
129        return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
130      }
131    });
132    // there is only one rs in the group and we killed it, so the region can not be online, until
133    // later we add new servers to it.
134    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
135      @Override
136      public boolean evaluate() throws Exception {
137        return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
138      }
139    });
140    Set<Address> newServers = Sets.newHashSet();
141    newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
142    ADMIN.moveServersToRSGroup(newServers, appInfo.getName());
143
144    // Make sure all the table's regions get reassigned
145    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
146    ADMIN.disableTable(tableName);
147    ADMIN.enableTable(tableName);
148
149    // wait for region to be assigned
150    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
151      @Override
152      public boolean evaluate() throws Exception {
153        return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty();
154      }
155    });
156
157    ServerName targetServer1 = getServerName(newServers.iterator().next());
158    assertEquals(1, ADMIN.getRegions(targetServer1).size());
159    assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable());
160  }
161
162  @Test
163  public void testKillAllRSInGroup() throws Exception {
164    // create a rsgroup and move two regionservers to it
165    String groupName = "my_group";
166    int groupRSCount = 2;
167    addGroup(groupName, groupRSCount);
168
169    // create a table, and move it to my_group
170    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
171    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
172    Set<TableName> toAddTables = new HashSet<>();
173    toAddTables.add(tableName);
174    ADMIN.setRSGroup(toAddTables, groupName);
175    assertTrue(
176      ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName));
177    TEST_UTIL.waitTableAvailable(tableName, 30000);
178
179    // check my_group servers and table regions
180    Set<Address> servers = ADMIN.getRSGroup(groupName).getServers();
181    assertEquals(2, servers.size());
182    LOG.debug("group servers {}", servers);
183    for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates()
184      .getRegionsOfTable(tableName)) {
185      assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates()
186        .getRegionAssignments().get(tr).getAddress()));
187    }
188
189    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
190    // ('lastHost' of other regions are in 'default' group)
191    // and check if all table regions are online
192    List<ServerName> gsn = new ArrayList<>();
193    for (Address addr : servers) {
194      gsn.add(getServerName(addr));
195    }
196    assertEquals(2, gsn.size());
197    for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates()
198      .getRegionAssignments().entrySet()) {
199      if (entry.getKey().getTable().equals(tableName)) {
200        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
201          entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
202        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
203        break;
204      }
205    }
206    TEST_UTIL.waitTableAvailable(tableName, 30000);
207
208    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
209    // and then check if all table regions are online
210    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
211      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
212    }
213    // better wait for a while for region reassign
214    sleep(10000);
215    assertEquals(NUM_SLAVES_BASE - gsn.size(),
216      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
217    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
218      gsn.get(0).getPort());
219    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
220      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
221    TEST_UTIL.waitTableAvailable(tableName, 30000);
222
223    // case 2: stop all the regionservers in my_group, and move another
224    // regionserver(from the 'default' group) to my_group,
225    // and then check if all table regions are online
226    for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
227      .getLiveRegionServerThreads()) {
228      if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
229        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
230        break;
231      }
232    }
233    sleep(10000);
234    assertEquals(NUM_SLAVES_BASE - gsn.size(),
235      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
236    ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0);
237    ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName);
238    // wait and check if table regions are online
239    TEST_UTIL.waitTableAvailable(tableName, 30000);
240  }
241
242  @Test
243  public void testLowerMetaGroupVersion() throws Exception {
244    // create a rsgroup and move one regionserver to it
245    String groupName = "meta_group";
246    int groupRSCount = 1;
247    addGroup(groupName, groupRSCount);
248
249    // move hbase:meta to meta_group
250    Set<TableName> toAddTables = new HashSet<>();
251    toAddTables.add(TableName.META_TABLE_NAME);
252    ADMIN.setRSGroup(toAddTables, groupName);
253    assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond()
254      .contains(TableName.META_TABLE_NAME));
255
256    // restart the regionserver in meta_group, and lower its version
257    String originVersion = "";
258    Set<Address> servers = new HashSet<>();
259    for (Address addr : ADMIN.getRSGroup(groupName).getServers()) {
260      servers.add(addr);
261      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
262      originVersion = MASTER.getRegionServerVersion(getServerName(addr));
263    }
264    // better wait for a while for region reassign
265    sleep(10000);
266    assertEquals(NUM_SLAVES_BASE - groupRSCount,
267      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
268    Address address = servers.iterator().next();
269    int majorVersion = VersionInfo.getMajorVersion(originVersion);
270    assertTrue(majorVersion >= 1);
271    String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1];
272    setFinalStatic(Version.class.getField("version"), lowerVersion);
273    TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort());
274    assertEquals(NUM_SLAVES_BASE,
275      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
276    assertTrue(VersionInfo.compareVersion(originVersion,
277      MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
278    LOG.debug("wait for META assigned...");
279    // SCP finished, which means all regions assigned too.
280    TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream()
281      .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
282  }
283
284  private static void setFinalStatic(Field field, Object newValue) throws Exception {
285    field.setAccessible(true);
286    Field modifiersField = ReflectionUtils.getModifiersField();
287    modifiersField.setAccessible(true);
288    modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
289    field.set(null, newValue);
290  }
291}