001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.lang.reflect.Field;
025import java.lang.reflect.Modifier;
026import java.util.ArrayList;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.NamespaceDescriptor;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.Version;
036import org.apache.hadoop.hbase.Waiter;
037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.client.TableDescriptor;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
043import org.apache.hadoop.hbase.net.Address;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.JVMClusterUtil;
047import org.apache.hadoop.hbase.util.VersionInfo;
048import org.junit.After;
049import org.junit.AfterClass;
050import org.junit.Before;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
059
060@Category({ LargeTests.class })
061public class TestRSGroupsKillRS extends TestRSGroupsBase {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
066
067  protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
068
069  @BeforeClass
070  public static void setUp() throws Exception {
071    setUpTestBeforeClass();
072  }
073
074  @AfterClass
075  public static void tearDown() throws Exception {
076    tearDownAfterClass();
077  }
078
079  @Before
080  public void beforeMethod() throws Exception {
081    setUpBeforeMethod();
082  }
083
084  @After
085  public void afterMethod() throws Exception {
086    tearDownAfterMethod();
087  }
088
089  @Test
090  public void testKillRS() throws Exception {
091    RSGroupInfo appInfo = addGroup("appInfo", 1);
092    final TableName tableName = TableName.valueOf(tablePrefix + "_ns", name.getMethodName());
093    admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
094      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
095    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
096      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
097    admin.createTable(desc);
098    // wait for created table to be assigned
099    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
100      @Override
101      public boolean evaluate() throws Exception {
102        return getTableRegionMap().get(desc.getTableName()) != null;
103      }
104    });
105
106    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
107    assertEquals(1, admin.getRegions(targetServer).size());
108
109    try {
110      // stopping may cause an exception
111      // due to the connection loss
112      admin.stopRegionServer(targetServer.getAddress().toString());
113    } catch (Exception e) {
114    }
115    // wait until the server is actually down
116    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
117      @Override
118      public boolean evaluate() throws Exception {
119        return !cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
120      }
121    });
122    // there is only one rs in the group and we killed it, so the region can not be online, until
123    // later we add new servers to it.
124    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
125      @Override
126      public boolean evaluate() throws Exception {
127        return !cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
128      }
129    });
130    Set<Address> newServers = Sets.newHashSet();
131    newServers
132      .add(rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
133    rsGroupAdmin.moveServers(newServers, appInfo.getName());
134
135    // Make sure all the table's regions get reassigned
136    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
137    admin.disableTable(tableName);
138    admin.enableTable(tableName);
139
140    // wait for region to be assigned
141    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
142      @Override
143      public boolean evaluate() throws Exception {
144        return cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
145      }
146    });
147
148    ServerName targetServer1 = getServerName(newServers.iterator().next());
149    assertEquals(1, admin.getRegions(targetServer1).size());
150    assertEquals(tableName, admin.getRegions(targetServer1).get(0).getTable());
151  }
152
153  @Test
154  public void testKillAllRSInGroup() throws Exception {
155    // create a rsgroup and move two regionservers to it
156    String groupName = "my_group";
157    int groupRSCount = 2;
158    addGroup(groupName, groupRSCount);
159
160    // create a table, and move it to my_group
161    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
162    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
163    Set<TableName> toAddTables = new HashSet<>();
164    toAddTables.add(tableName);
165    rsGroupAdmin.moveTables(toAddTables, groupName);
166    assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName));
167    TEST_UTIL.waitTableAvailable(tableName, 30000);
168
169    // check my_group servers and table regions
170    Set<Address> servers = rsGroupAdmin.getRSGroupInfo(groupName).getServers();
171    assertEquals(2, servers.size());
172    LOG.debug("group servers {}", servers);
173    for (RegionInfo tr :
174        master.getAssignmentManager().getRegionStates().getRegionsOfTable(tableName)) {
175      assertTrue(servers.contains(
176          master.getAssignmentManager().getRegionStates().getRegionAssignments()
177              .get(tr).getAddress()));
178    }
179
180    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
181    // ('lastHost' of other regions are in 'default' group)
182    // and check if all table regions are online
183    List<ServerName> gsn = new ArrayList<>();
184    for(Address addr : servers){
185      gsn.add(getServerName(addr));
186    }
187    assertEquals(2, gsn.size());
188    for(Map.Entry<RegionInfo, ServerName> entry :
189        master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()){
190      if(entry.getKey().getTable().equals(tableName)){
191        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
192            entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
193        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
194        break;
195      }
196    }
197    TEST_UTIL.waitTableAvailable(tableName, 30000);
198
199    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
200    // and then check if all table regions are online
201    for(Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
202      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
203    }
204    // better wait for a while for region reassign
205    sleep(10000);
206    assertEquals(NUM_SLAVES_BASE - gsn.size(),
207        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
208    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
209        gsn.get(0).getPort());
210    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
211        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
212    TEST_UTIL.waitTableAvailable(tableName, 30000);
213
214    // case 2: stop all the regionservers in my_group, and move another
215    // regionserver(from the 'default' group) to my_group,
216    // and then check if all table regions are online
217    for(JVMClusterUtil.RegionServerThread rst :
218        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()){
219      if(rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())){
220        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
221        break;
222      }
223    }
224    sleep(10000);
225    assertEquals(NUM_SLAVES_BASE - gsn.size(),
226        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
227    ServerName newServer = master.getServerManager().getOnlineServersList().get(0);
228    rsGroupAdmin.moveServers(Sets.newHashSet(newServer.getAddress()), groupName);
229    // wait and check if table regions are online
230    TEST_UTIL.waitTableAvailable(tableName, 30000);
231  }
232
233  @Test
234  public void testLowerMetaGroupVersion() throws Exception{
235    // create a rsgroup and move one regionserver to it
236    String groupName = "meta_group";
237    int groupRSCount = 1;
238    addGroup(groupName, groupRSCount);
239
240    // move hbase:meta to meta_group
241    Set<TableName> toAddTables = new HashSet<>();
242    toAddTables.add(TableName.META_TABLE_NAME);
243    rsGroupAdmin.moveTables(toAddTables, groupName);
244    assertTrue(
245        rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(TableName.META_TABLE_NAME));
246
247    // restart the regionserver in meta_group, and lower its version
248    String originVersion = "";
249    Set<Address> servers = new HashSet<>();
250    for(Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
251      servers.add(addr);
252      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
253      originVersion = master.getRegionServerVersion(getServerName(addr));
254    }
255    // better wait for a while for region reassign
256    sleep(10000);
257    assertEquals(NUM_SLAVES_BASE - groupRSCount,
258        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
259    Address address = servers.iterator().next();
260    int majorVersion = VersionInfo.getMajorVersion(originVersion);
261    assertTrue(majorVersion >= 1);
262    String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1];
263    setFinalStatic(Version.class.getField("version"), lowerVersion);
264    TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostname(),
265        address.getPort());
266    assertEquals(NUM_SLAVES_BASE,
267        TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
268    assertTrue(VersionInfo.compareVersion(originVersion,
269        master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
270    LOG.debug("wait for META assigned...");
271    // SCP finished, which means all regions assigned too.
272    TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream()
273        .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
274  }
275
276  private static void setFinalStatic(Field field, Object newValue) throws Exception {
277    field.setAccessible(true);
278    Field modifiersField = Field.class.getDeclaredField("modifiers");
279    modifiersField.setAccessible(true);
280    modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
281    field.set(null, newValue);
282  }
283}