001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNotSame; 026import static org.junit.Assert.assertNull; 027import static org.junit.Assert.assertTrue; 028import static org.junit.Assert.fail; 029 030import java.io.IOException; 031import java.lang.reflect.Field; 032import java.util.ArrayList; 033import java.util.Collection; 034import java.util.List; 035import java.util.Map; 036import java.util.Optional; 037import java.util.concurrent.CountDownLatch; 038import java.util.concurrent.ExecutionException; 039import java.util.concurrent.TimeUnit; 040import java.util.concurrent.TimeoutException; 041import java.util.concurrent.atomic.AtomicBoolean; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.CellComparator; 046import org.apache.hadoop.hbase.Coprocessor; 047import org.apache.hadoop.hbase.CoprocessorEnvironment; 048import org.apache.hadoop.hbase.DoNotRetryIOException; 049import org.apache.hadoop.hbase.HBaseClassTestRule; 050import org.apache.hadoop.hbase.HBaseTestingUtil; 051import org.apache.hadoop.hbase.HConstants; 052import org.apache.hadoop.hbase.MasterNotRunningException; 053import org.apache.hadoop.hbase.PrivateCellUtil; 054import org.apache.hadoop.hbase.ServerName; 055import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 056import org.apache.hadoop.hbase.StartTestingClusterOption; 057import org.apache.hadoop.hbase.TableName; 058import org.apache.hadoop.hbase.ZooKeeperConnectionException; 059import org.apache.hadoop.hbase.client.Admin; 060import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 061import org.apache.hadoop.hbase.client.Consistency; 062import org.apache.hadoop.hbase.client.Delete; 063import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 064import org.apache.hadoop.hbase.client.Get; 065import org.apache.hadoop.hbase.client.Mutation; 066import org.apache.hadoop.hbase.client.Put; 067import org.apache.hadoop.hbase.client.RegionInfo; 068import org.apache.hadoop.hbase.client.Result; 069import org.apache.hadoop.hbase.client.ResultScanner; 070import org.apache.hadoop.hbase.client.Scan; 071import org.apache.hadoop.hbase.client.Table; 072import org.apache.hadoop.hbase.client.TableDescriptor; 073import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 074import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro; 075import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 077import org.apache.hadoop.hbase.coprocessor.MasterObserver; 078import org.apache.hadoop.hbase.coprocessor.ObserverContext; 079import org.apache.hadoop.hbase.io.HFileLink; 080import org.apache.hadoop.hbase.io.Reference; 081import org.apache.hadoop.hbase.master.HMaster; 082import org.apache.hadoop.hbase.master.MasterRpcServices; 083import org.apache.hadoop.hbase.master.RegionState; 084import org.apache.hadoop.hbase.master.RegionState.State; 085import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 086import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 087import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 088import org.apache.hadoop.hbase.master.assignment.RegionStates; 089import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 090import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 091import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 092import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 093import org.apache.hadoop.hbase.testclassification.LargeTests; 094import org.apache.hadoop.hbase.testclassification.RegionServerTests; 095import org.apache.hadoop.hbase.util.Bytes; 096import org.apache.hadoop.hbase.util.CommonFSUtils; 097import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 098import org.apache.hadoop.hbase.util.FSUtils; 099import org.apache.hadoop.hbase.util.FutureUtils; 100import org.apache.hadoop.hbase.util.HBaseFsck; 101import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 102import org.apache.hadoop.hbase.util.Threads; 103import org.apache.zookeeper.KeeperException; 104import org.apache.zookeeper.KeeperException.NodeExistsException; 105import org.junit.After; 106import org.junit.AfterClass; 107import org.junit.Assert; 108import org.junit.Before; 109import org.junit.BeforeClass; 110import org.junit.ClassRule; 111import org.junit.Rule; 112import org.junit.Test; 113import org.junit.experimental.categories.Category; 114import org.junit.rules.TestName; 115import org.mockito.Mockito; 116import org.slf4j.Logger; 117import org.slf4j.LoggerFactory; 118 119import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 120import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 121import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 122 123import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 124import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 125import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 127 128/** 129 * The below tests are testing split region against a running cluster 130 */ 131@Category({ RegionServerTests.class, LargeTests.class }) 132public class TestSplitTransactionOnCluster { 133 134 @ClassRule 135 public static final HBaseClassTestRule CLASS_RULE = 136 HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class); 137 138 private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class); 139 private Admin admin = null; 140 private SingleProcessHBaseCluster cluster = null; 141 private static final int NB_SERVERS = 3; 142 143 static final HBaseTestingUtil TESTING_UTIL = new HBaseTestingUtil(); 144 145 @Rule 146 public TestName name = new TestName(); 147 148 @BeforeClass 149 public static void before() throws Exception { 150 TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); 151 StartTestingClusterOption option = StartTestingClusterOption.builder() 152 .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build(); 153 TESTING_UTIL.startMiniCluster(option); 154 } 155 156 @AfterClass 157 public static void after() throws Exception { 158 TESTING_UTIL.shutdownMiniCluster(); 159 } 160 161 @Before 162 public void setup() throws IOException { 163 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS); 164 this.admin = TESTING_UTIL.getAdmin(); 165 this.cluster = TESTING_UTIL.getMiniHBaseCluster(); 166 } 167 168 @After 169 public void tearDown() throws Exception { 170 this.admin.close(); 171 for (TableDescriptor htd : this.admin.listTableDescriptors()) { 172 LOG.info("Tear down, remove table=" + htd.getTableName()); 173 TESTING_UTIL.deleteTable(htd.getTableName()); 174 } 175 } 176 177 private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) 178 throws IOException, InterruptedException { 179 assertEquals(1, regions.size()); 180 RegionInfo hri = regions.get(0).getRegionInfo(); 181 AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); 182 return hri; 183 } 184 185 private void requestSplitRegion(final HRegionServer rsServer, final Region region, 186 final byte[] midKey) throws IOException { 187 long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0); 188 // wait for the split to complete or get interrupted. If the split completes successfully, 189 // the procedure will return true; if the split fails, the procedure would throw exception. 190 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 191 } 192 193 @Test 194 public void testRITStateForRollback() throws Exception { 195 final TableName tableName = TableName.valueOf(name.getMethodName()); 196 final HMaster master = cluster.getMaster(); 197 try { 198 // Create table then get the single region for our new table. 199 Table t = createTableAndWait(tableName, Bytes.toBytes("cf")); 200 final List<HRegion> regions = cluster.getRegions(tableName); 201 final RegionInfo hri = getAndCheckSingleTableRegion(regions); 202 insertData(tableName, admin, t); 203 t.close(); 204 205 // Turn off balancer so it doesn't cut in and mess up our placements. 206 this.admin.balancerSwitch(false, true); 207 // Turn off the meta scanner so it don't remove parent on us. 208 master.setCatalogJanitorEnabled(false); 209 210 // find a splittable region 211 final HRegion region = findSplittableRegion(regions); 212 assertTrue("not able to find a splittable region", region != null); 213 214 // install master co-processor to fail splits 215 master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class, 216 Coprocessor.PRIORITY_USER, master.getConfiguration()); 217 218 // split async 219 this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 }); 220 221 // we have to wait until the SPLITTING state is seen by the master 222 FailingSplitMasterObserver observer = 223 master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class); 224 assertNotNull(observer); 225 observer.latch.await(); 226 227 LOG.info("Waiting for region to come out of RIT"); 228 while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) { 229 Threads.sleep(100); 230 } 231 assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)); 232 } finally { 233 admin.balancerSwitch(true, false); 234 master.setCatalogJanitorEnabled(true); 235 abortAndWaitForMaster(); 236 TESTING_UTIL.deleteTable(tableName); 237 } 238 } 239 240 @Test 241 public void testSplitFailedCompactionAndSplit() throws Exception { 242 final TableName tableName = TableName.valueOf(name.getMethodName()); 243 // Create table then get the single region for our new table. 244 byte[] cf = Bytes.toBytes("cf"); 245 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 246 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 247 admin.createTable(htd); 248 249 for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) { 250 Thread.sleep(100); 251 } 252 assertEquals(1, cluster.getRegions(tableName).size()); 253 254 HRegion region = cluster.getRegions(tableName).get(0); 255 HStore store = region.getStore(cf); 256 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 257 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 258 259 Table t = TESTING_UTIL.getConnection().getTable(tableName); 260 // insert data 261 insertData(tableName, admin, t); 262 insertData(tableName, admin, t); 263 264 int fileNum = store.getStorefiles().size(); 265 // 0, Compaction Request 266 store.triggerMajorCompaction(); 267 Optional<CompactionContext> cc = store.requestCompaction(); 268 assertTrue(cc.isPresent()); 269 // 1, A timeout split 270 // 1.1 close region 271 assertEquals(2, region.close(false).get(cf).size()); 272 // 1.2 rollback and Region initialize again 273 region.initialize(); 274 275 // 2, Run Compaction cc 276 assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE)); 277 assertTrue(fileNum > store.getStorefiles().size()); 278 279 // 3, Split 280 requestSplitRegion(regionServer, region, Bytes.toBytes("row3")); 281 assertEquals(2, cluster.getRegions(tableName).size()); 282 } 283 284 @Test 285 public void testSplitCompactWithPriority() throws Exception { 286 final TableName tableName = TableName.valueOf(name.getMethodName()); 287 // Create table then get the single region for our new table. 288 byte[] cf = Bytes.toBytes("cf"); 289 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 290 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 291 admin.createTable(htd); 292 293 assertNotEquals("Unable to retrieve regions of the table", -1, 294 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 295 296 HRegion region = cluster.getRegions(tableName).get(0); 297 HStore store = region.getStore(cf); 298 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 299 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 300 301 Table table = TESTING_UTIL.getConnection().getTable(tableName); 302 // insert data 303 insertData(tableName, admin, table); 304 insertData(tableName, admin, table, 20); 305 insertData(tableName, admin, table, 40); 306 307 // Compaction Request 308 store.triggerMajorCompaction(); 309 Optional<CompactionContext> compactionContext = store.requestCompaction(); 310 assertTrue(compactionContext.isPresent()); 311 assertFalse(compactionContext.get().getRequest().isAfterSplit()); 312 assertEquals(compactionContext.get().getRequest().getPriority(), 13); 313 314 // Split 315 long procId = 316 cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0); 317 318 // wait for the split to complete or get interrupted. If the split completes successfully, 319 // the procedure will return true; if the split fails, the procedure would throw exception. 320 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 321 Thread.sleep(3000); 322 assertNotEquals("Table is not split properly?", -1, 323 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 324 // we have 2 daughter regions 325 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 326 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 327 HStore hStore1 = hRegion1.getStore(cf); 328 HStore hStore2 = hRegion2.getStore(cf); 329 330 // For hStore1 && hStore2, set mock reference to one of the storeFiles 331 StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo(); 332 StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo(); 333 Field field = StoreFileInfo.class.getDeclaredField("reference"); 334 field.setAccessible(true); 335 field.set(storeFileInfo1, Mockito.mock(Reference.class)); 336 field.set(storeFileInfo2, Mockito.mock(Reference.class)); 337 hStore1.triggerMajorCompaction(); 338 hStore2.triggerMajorCompaction(); 339 340 compactionContext = hStore1.requestCompaction(); 341 assertTrue(compactionContext.isPresent()); 342 // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true && 343 // highest priority for hStore1's compactionContext 344 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 345 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000); 346 347 compactionContext = 348 hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null); 349 assertTrue(compactionContext.isPresent()); 350 // compaction request contains higher priority than default priority of daughter region 351 // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to 352 // be accepted. 353 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 354 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10); 355 admin.disableTable(tableName); 356 admin.deleteTable(tableName); 357 } 358 359 @Test 360 public void testContinuousSplitUsingLinkFile() throws Exception { 361 final TableName tableName = TableName.valueOf(name.getMethodName()); 362 // Create table then get the single region for our new table. 363 byte[] cf = Bytes.toBytes("cf"); 364 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 365 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)); 366 String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName(); 367 builder.setValue(SPLIT_POLICY, splitPolicy); 368 369 admin.createTable(builder.build()); 370 admin.compactionSwitch(false, new ArrayList<>()); 371 372 assertNotEquals("Unable to retrieve regions of the table", -1, 373 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 374 Table table = TESTING_UTIL.getConnection().getTable(tableName); 375 // insert data 376 insertData(tableName, admin, table, 10); 377 insertData(tableName, admin, table, 20); 378 insertData(tableName, admin, table, 40); 379 int rowCount = 3 * 4; 380 Scan scan = new Scan(); 381 scanValidate(scan, rowCount, table); 382 383 // Split 384 admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(), 385 Bytes.toBytes("row14")); 386 // wait for the split to complete or get interrupted. If the split completes successfully, 387 // the procedure will return true; if the split fails, the procedure would throw exception. 388 Thread.sleep(3000); 389 assertNotEquals("Table is not split properly?", -1, 390 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 391 // we have 2 daughter regions 392 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 393 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 394 HStore hStore1 = hRegion1.getStore(cf); 395 HStore hStore2 = hRegion2.getStore(cf); 396 // the sum of store files of the two children should be equal to their parent 397 assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount()); 398 // both the two children should have link files 399 for (StoreFile sf : hStore1.getStorefiles()) { 400 assertTrue(HFileLink.isHFileLink(sf.getPath())); 401 } 402 for (StoreFile sf : hStore2.getStorefiles()) { 403 assertTrue(HFileLink.isHFileLink(sf.getPath())); 404 } 405 // validate children data 406 scan = new Scan(); 407 scanValidate(scan, rowCount, table); 408 409 // Continuous Split 410 findRegionToSplit(tableName, "row24"); 411 Thread.sleep(3000); 412 assertNotEquals("Table is not split properly?", -1, 413 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3)); 414 // now table has 3 region, each region should have one link file 415 for (HRegion newRegion : cluster.getRegions(tableName)) { 416 assertEquals(1, newRegion.getStore(cf).getStorefilesCount()); 417 assertTrue( 418 HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath())); 419 } 420 421 scan = new Scan(); 422 scanValidate(scan, rowCount, table); 423 424 // Continuous Split, random split HFileLink, generate Reference files. 425 // After this, can not continuous split, because there are reference files. 426 findRegionToSplit(tableName, "row11"); 427 Thread.sleep(3000); 428 assertNotEquals("Table is not split properly?", -1, 429 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4)); 430 431 scan = new Scan(); 432 scanValidate(scan, rowCount, table); 433 } 434 435 private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception { 436 HRegion toSplit = null; 437 byte[] toSplitKey = Bytes.toBytes(splitRowKey); 438 for (HRegion rg : cluster.getRegions(tableName)) { 439 LOG.debug( 440 "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()=" 441 + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey); 442 if ( 443 (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare( 444 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()), 445 PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0) 446 && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare( 447 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()), 448 PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0) 449 ) { 450 toSplit = rg; 451 } 452 } 453 assertNotNull(toSplit); 454 admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey); 455 } 456 457 private static void scanValidate(Scan scan, int expectedRowCount, Table table) 458 throws IOException { 459 ResultScanner scanner = table.getScanner(scan); 460 int rows = 0; 461 for (Result result : scanner) { 462 rows++; 463 } 464 scanner.close(); 465 assertEquals(expectedRowCount, rows); 466 } 467 468 public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver { 469 volatile CountDownLatch latch; 470 471 @Override 472 public void start(CoprocessorEnvironment e) throws IOException { 473 latch = new CountDownLatch(1); 474 } 475 476 @Override 477 public Optional<MasterObserver> getMasterObserver() { 478 return Optional.of(this); 479 } 480 481 @Override 482 public void preSplitRegionBeforeMETAAction( 483 final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey, 484 final List<Mutation> metaEntries) throws IOException { 485 latch.countDown(); 486 throw new IOException("Causing rollback of region split"); 487 } 488 } 489 490 @Test 491 public void testSplitRollbackOnRegionClosing() throws Exception { 492 final TableName tableName = TableName.valueOf(name.getMethodName()); 493 494 // Create table then get the single region for our new table. 495 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 496 List<HRegion> regions = cluster.getRegions(tableName); 497 RegionInfo hri = getAndCheckSingleTableRegion(regions); 498 499 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 500 501 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); 502 503 // Turn off balancer so it doesn't cut in and mess up our placements. 504 this.admin.balancerSwitch(false, true); 505 // Turn off the meta scanner so it don't remove parent on us. 506 cluster.getMaster().setCatalogJanitorEnabled(false); 507 try { 508 // Add a bit of load up into the table so splittable. 509 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 510 // Get region pre-split. 511 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 512 printOutRegions(server, "Initial regions: "); 513 int regionCount = cluster.getRegions(hri.getTable()).size(); 514 regionStates.updateRegionState(hri, RegionState.State.CLOSING); 515 516 // Now try splitting.... should fail. And each should successfully 517 // rollback. 518 // We don't roll back here anymore. Instead we fail-fast on construction of the 519 // split transaction. Catch the exception instead. 520 try { 521 FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName())); 522 fail(); 523 } catch (DoNotRetryRegionException e) { 524 // Expected 525 } 526 // Wait around a while and assert count of regions remains constant. 527 for (int i = 0; i < 10; i++) { 528 Thread.sleep(100); 529 assertEquals(regionCount, cluster.getRegions(hri.getTable()).size()); 530 } 531 regionStates.updateRegionState(hri, State.OPEN); 532 // Now try splitting and it should work. 533 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 534 // Get daughters 535 checkAndGetDaughters(tableName); 536 // OK, so split happened after we cleared the blocking node. 537 } finally { 538 admin.balancerSwitch(true, false); 539 cluster.getMaster().setCatalogJanitorEnabled(true); 540 t.close(); 541 } 542 } 543 544 /** 545 * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't 546 * find the immediate daughter of an offlined parent. 547 */ 548 @Test 549 public void testShutdownFixupWhenDaughterHasSplit() throws Exception { 550 final TableName tableName = TableName.valueOf(name.getMethodName()); 551 552 // Create table then get the single region for our new table. 553 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 554 List<HRegion> regions = cluster.getRegions(tableName); 555 RegionInfo hri = getAndCheckSingleTableRegion(regions); 556 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 557 558 // Turn off balancer so it doesn't cut in and mess up our placements. 559 this.admin.balancerSwitch(false, true); 560 // Turn off the meta scanner so it don't remove parent on us. 561 cluster.getMaster().setCatalogJanitorEnabled(false); 562 try { 563 // Add a bit of load up into the table so splittable. 564 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 565 // Get region pre-split. 566 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 567 printOutRegions(server, "Initial regions: "); 568 // Now split. 569 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 570 // Get daughters 571 List<HRegion> daughters = checkAndGetDaughters(tableName); 572 // Now split one of the daughters. 573 HRegion daughterRegion = daughters.get(0); 574 RegionInfo daughter = daughterRegion.getRegionInfo(); 575 LOG.info("Daughter we are going to split: " + daughter); 576 clearReferences(daughterRegion); 577 LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences()); 578 admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES); 579 // Get list of daughters 580 daughters = cluster.getRegions(tableName); 581 for (HRegion d : daughters) { 582 LOG.info("Regions before crash: " + d); 583 } 584 // Now crash the server 585 cluster.abortRegionServer(tableRegionIndex); 586 waitUntilRegionServerDead(); 587 awaitDaughters(tableName, daughters.size()); 588 // Assert daughters are online and ONLY the original daughters -- that 589 // fixup didn't insert one during server shutdown recover. 590 regions = cluster.getRegions(tableName); 591 for (HRegion d : daughters) { 592 LOG.info("Regions after crash: " + d); 593 } 594 if (daughters.size() != regions.size()) { 595 LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size()); 596 } 597 assertEquals(daughters.size(), regions.size()); 598 for (HRegion r : regions) { 599 LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r)); 600 assertTrue("Missing region post crash " + r, daughters.contains(r)); 601 } 602 } finally { 603 LOG.info("EXITING"); 604 admin.balancerSwitch(true, false); 605 cluster.getMaster().setCatalogJanitorEnabled(true); 606 t.close(); 607 } 608 } 609 610 private void clearReferences(HRegion region) throws IOException { 611 // Presumption. 612 assertEquals(1, region.getStores().size()); 613 HStore store = region.getStores().get(0); 614 while (store.hasReferences()) { 615 while (store.storeEngine.getCompactor().isCompacting()) { 616 Threads.sleep(100); 617 } 618 // Run new compaction. Shoudn't be any others running. 619 region.compact(true); 620 store.closeAndArchiveCompactedFiles(); 621 } 622 } 623 624 @Test 625 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception { 626 TableName userTableName = TableName.valueOf(name.getMethodName()); 627 TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName) 628 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build(); 629 admin.createTable(htd); 630 Table table = TESTING_UTIL.getConnection().getTable(userTableName); 631 try { 632 for (int i = 0; i <= 5; i++) { 633 String row = "row" + i; 634 Put p = new Put(Bytes.toBytes(row)); 635 String val = "Val" + i; 636 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val)); 637 table.put(p); 638 admin.flush(userTableName); 639 Delete d = new Delete(Bytes.toBytes(row)); 640 // Do a normal delete 641 table.delete(d); 642 admin.flush(userTableName); 643 } 644 admin.majorCompact(userTableName); 645 List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 646 .getRegionsOfTable(userTableName); 647 assertEquals(1, regionsOfTable.size()); 648 RegionInfo hRegionInfo = regionsOfTable.get(0); 649 Put p = new Put(Bytes.toBytes("row6")); 650 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 651 table.put(p); 652 p = new Put(Bytes.toBytes("row7")); 653 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 654 table.put(p); 655 p = new Put(Bytes.toBytes("row8")); 656 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 657 table.put(p); 658 admin.flush(userTableName); 659 admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7")); 660 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 661 .getRegionsOfTable(userTableName); 662 663 while (regionsOfTable.size() != 2) { 664 Thread.sleep(1000); 665 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 666 .getRegionsOfTable(userTableName); 667 LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": " 668 + regionsOfTable); 669 670 } 671 Assert.assertEquals(2, regionsOfTable.size()); 672 673 Scan s = new Scan(); 674 ResultScanner scanner = table.getScanner(s); 675 int mainTableCount = 0; 676 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { 677 mainTableCount++; 678 } 679 Assert.assertEquals(3, mainTableCount); 680 } finally { 681 table.close(); 682 } 683 } 684 685 /** 686 * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could 687 * remove the parent region the master is killed and restarted. 688 */ 689 @Test 690 public void testMasterRestartAtRegionSplitPendingCatalogJanitor() 691 throws IOException, InterruptedException, NodeExistsException, KeeperException, 692 ServiceException, ExecutionException, TimeoutException { 693 final TableName tableName = TableName.valueOf(name.getMethodName()); 694 // Create table then get the single region for our new table. 695 try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) { 696 List<HRegion> regions = cluster.getRegions(tableName); 697 RegionInfo hri = getAndCheckSingleTableRegion(regions); 698 699 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 700 701 // Turn off balancer so it doesn't cut in and mess up our placements. 702 this.admin.balancerSwitch(false, true); 703 // Turn off the meta scanner so it don't remove parent on us. 704 cluster.getMaster().setCatalogJanitorEnabled(false); 705 // Add a bit of load up into the table so splittable. 706 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 707 // Get region pre-split. 708 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 709 printOutRegions(server, "Initial regions: "); 710 // Call split. 711 this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 712 List<HRegion> daughters = checkAndGetDaughters(tableName); 713 714 // Before cleanup, get a new master. 715 HMaster master = abortAndWaitForMaster(); 716 // Now call compact on the daughters and clean up any references. 717 for (HRegion daughter : daughters) { 718 clearReferences(daughter); 719 assertFalse(daughter.hasReferences()); 720 } 721 // BUT calling compact on the daughters is not enough. The CatalogJanitor looks 722 // in the filesystem, and the filesystem content is not same as what the Region 723 // is reading from. Compacted-away files are picked up later by the compacted 724 // file discharger process. It runs infrequently. Make it run so CatalogJanitor 725 // doens't find any references. 726 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 727 boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); 728 rst.getRegionServer().compactedFileDischarger.run(); 729 rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); 730 } 731 cluster.getMaster().setCatalogJanitorEnabled(true); 732 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 733 LOG.info("Starting run of CatalogJanitor"); 734 cluster.getMaster().getCatalogJanitor().run(); 735 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 736 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 737 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri); 738 assertEquals(null, regionServerOfRegion); 739 } finally { 740 TESTING_UTIL.getAdmin().balancerSwitch(true, false); 741 cluster.getMaster().setCatalogJanitorEnabled(true); 742 } 743 } 744 745 @Test 746 public void testSplitWithRegionReplicas() throws Exception { 747 final TableName tableName = TableName.valueOf(name.getMethodName()); 748 TableDescriptor htd = TESTING_UTIL 749 .createModifyableTableDescriptor(TableName.valueOf(name.getMethodName()), 750 ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER, 751 ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED) 752 .setRegionReplication(2).setCoprocessor(SlowMeCopro.class.getName()).build(); 753 // Create table then get the single region for our new table. 754 Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null); 755 List<HRegion> oldRegions; 756 do { 757 oldRegions = cluster.getRegions(tableName); 758 Thread.sleep(10); 759 } while (oldRegions.size() != 2); 760 for (HRegion h : oldRegions) 761 LOG.debug("OLDREGION " + h.getRegionInfo()); 762 try { 763 int regionServerIndex = 764 cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName()); 765 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 766 insertData(tableName, admin, t); 767 // Turn off balancer so it doesn't cut in and mess up our placements. 768 admin.balancerSwitch(false, true); 769 // Turn off the meta scanner so it don't remove parent on us. 770 cluster.getMaster().setCatalogJanitorEnabled(false); 771 boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 772 assertEquals("The specified table should be present.", true, tableExists); 773 final HRegion region = findSplittableRegion(oldRegions); 774 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 775 regionServer = cluster.getRegionServer(regionServerIndex); 776 assertTrue("not able to find a splittable region", region != null); 777 try { 778 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 779 } catch (IOException e) { 780 e.printStackTrace(); 781 fail("Split execution should have succeeded with no exceptions thrown " + e); 782 } 783 // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName); 784 List<HRegion> newRegions; 785 do { 786 newRegions = cluster.getRegions(tableName); 787 for (HRegion h : newRegions) 788 LOG.debug("NEWREGION " + h.getRegionInfo()); 789 Thread.sleep(1000); 790 } while ( 791 (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1))) 792 || newRegions.size() != 4 793 ); 794 tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 795 assertEquals("The specified table should be present.", true, tableExists); 796 // exists works on stale and we see the put after the flush 797 byte[] b1 = Bytes.toBytes("row1"); 798 Get g = new Get(b1); 799 g.setConsistency(Consistency.STRONG); 800 // The following GET will make a trip to the meta to get the new location of the 1st daughter 801 // In the process it will also get the location of the replica of the daughter (initially 802 // pointing to the parent's replica) 803 Result r = t.get(g); 804 Assert.assertFalse(r.isStale()); 805 LOG.info("exists stale after flush done"); 806 807 SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1)); 808 g = new Get(b1); 809 g.setConsistency(Consistency.TIMELINE); 810 // This will succeed because in the previous GET we get the location of the replica 811 r = t.get(g); 812 Assert.assertTrue(r.isStale()); 813 SlowMeCopro.getPrimaryCdl().get().countDown(); 814 } finally { 815 SlowMeCopro.getPrimaryCdl().get().countDown(); 816 admin.balancerSwitch(true, false); 817 cluster.getMaster().setCatalogJanitorEnabled(true); 818 t.close(); 819 } 820 } 821 822 private void insertData(final TableName tableName, Admin admin, Table t) throws IOException { 823 insertData(tableName, admin, t, 1); 824 } 825 826 private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException { 827 Put p = new Put(Bytes.toBytes("row" + i)); 828 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); 829 t.put(p); 830 p = new Put(Bytes.toBytes("row" + (i + 1))); 831 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); 832 t.put(p); 833 p = new Put(Bytes.toBytes("row" + (i + 2))); 834 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); 835 t.put(p); 836 p = new Put(Bytes.toBytes("row" + (i + 3))); 837 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); 838 t.put(p); 839 admin.flush(tableName); 840 } 841 842 /** 843 * If a table has regions that have no store files in a region, they should split successfully 844 * into two regions with no store files. 845 */ 846 @Test 847 public void testSplitRegionWithNoStoreFiles() throws Exception { 848 final TableName tableName = TableName.valueOf(name.getMethodName()); 849 // Create table then get the single region for our new table. 850 createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 851 List<HRegion> regions = cluster.getRegions(tableName); 852 RegionInfo hri = getAndCheckSingleTableRegion(regions); 853 ensureTableRegionNotOnSameServerAsMeta(admin, hri); 854 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); 855 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 856 // Turn off balancer so it doesn't cut in and mess up our placements. 857 this.admin.balancerSwitch(false, true); 858 // Turn off the meta scanner so it don't remove parent on us. 859 cluster.getMaster().setCatalogJanitorEnabled(false); 860 try { 861 // Precondition: we created a table with no data, no store files. 862 printOutRegions(regionServer, "Initial regions: "); 863 Configuration conf = cluster.getConfiguration(); 864 HBaseFsck.debugLsr(conf, new Path("/")); 865 Path rootDir = CommonFSUtils.getRootDir(conf); 866 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem(); 867 Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 868 assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size()); 869 870 // find a splittable region. Refresh the regions list 871 regions = cluster.getRegions(tableName); 872 final HRegion region = findSplittableRegion(regions); 873 assertTrue("not able to find a splittable region", region != null); 874 875 // Now split. 876 try { 877 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 878 } catch (IOException e) { 879 fail("Split execution should have succeeded with no exceptions thrown"); 880 } 881 882 // Postcondition: split the table with no store files into two regions, but still have no 883 // store files 884 List<HRegion> daughters = cluster.getRegions(tableName); 885 assertEquals(2, daughters.size()); 886 887 // check dirs 888 HBaseFsck.debugLsr(conf, new Path("/")); 889 Map<String, Path> storefilesAfter = 890 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 891 assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0, 892 storefilesAfter.size()); 893 894 hri = region.getRegionInfo(); // split parent 895 AssignmentManager am = cluster.getMaster().getAssignmentManager(); 896 RegionStates regionStates = am.getRegionStates(); 897 long start = EnvironmentEdgeManager.currentTime(); 898 while (!regionStates.isRegionInState(hri, State.SPLIT)) { 899 LOG.debug("Waiting for SPLIT state on: " + hri); 900 assertFalse("Timed out in waiting split parent to be in state SPLIT", 901 EnvironmentEdgeManager.currentTime() - start > 60000); 902 Thread.sleep(500); 903 } 904 assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN)); 905 assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN)); 906 907 // We should not be able to assign it again 908 try { 909 am.assign(hri); 910 } catch (DoNotRetryIOException e) { 911 // Expected 912 } 913 assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri)); 914 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 915 916 // We should not be able to unassign it either 917 try { 918 am.unassign(hri); 919 fail("Should have thrown exception"); 920 } catch (DoNotRetryIOException e) { 921 // Expected 922 } 923 assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri)); 924 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 925 } finally { 926 admin.balancerSwitch(true, false); 927 cluster.getMaster().setCatalogJanitorEnabled(true); 928 } 929 } 930 931 @Test 932 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception { 933 final TableName tableName = TableName.valueOf(name.getMethodName()); 934 try { 935 byte[] cf = Bytes.toBytes("f"); 936 byte[] cf1 = Bytes.toBytes("i_f"); 937 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 938 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)) 939 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1)) 940 .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build(); 941 admin.createTable(htd); 942 List<HRegion> regions = awaitTableRegions(tableName); 943 HRegion region = regions.get(0); 944 for (int i = 3; i < 9; i++) { 945 Put p = new Put(Bytes.toBytes("row" + i)); 946 p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 947 p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 948 region.put(p); 949 } 950 region.flush(true); 951 HStore store = region.getStore(cf); 952 Collection<HStoreFile> storefiles = store.getStorefiles(); 953 assertEquals(1, storefiles.size()); 954 assertFalse(region.hasReferences()); 955 Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f", 956 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 957 assertNull(referencePath); 958 referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f", 959 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 960 assertNotNull(referencePath); 961 } finally { 962 TESTING_UTIL.deleteTable(tableName); 963 } 964 } 965 966 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException { 967 for (int i = 0; i < 5; ++i) { 968 for (HRegion r : regions) { 969 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) { 970 return (r); 971 } 972 } 973 Thread.sleep(100); 974 } 975 return null; 976 } 977 978 private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException { 979 List<HRegion> daughters = null; 980 // try up to 10s 981 for (int i = 0; i < 100; i++) { 982 daughters = cluster.getRegions(tableName); 983 if (daughters.size() >= 2) { 984 break; 985 } 986 Thread.sleep(100); 987 } 988 assertTrue(daughters.size() >= 2); 989 return daughters; 990 } 991 992 private HMaster abortAndWaitForMaster() throws IOException, InterruptedException { 993 cluster.abortMaster(0); 994 cluster.waitOnMaster(0); 995 HMaster master = cluster.startMaster().getMaster(); 996 cluster.waitForActiveAndReadyMaster(); 997 // reset the connections 998 Closeables.close(admin, true); 999 TESTING_UTIL.invalidateConnection(); 1000 admin = TESTING_UTIL.getAdmin(); 1001 return master; 1002 } 1003 1004 /** 1005 * Ensure single table region is not on same server as the single hbase:meta table region. 1006 * @return Index of the server hosting the single table region 1007 */ 1008 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri) 1009 throws IOException, MasterNotRunningException, ZooKeeperConnectionException, 1010 InterruptedException { 1011 // Now make sure that the table region is not on same server as that hosting 1012 // hbase:meta We don't want hbase:meta replay polluting our test when we later crash 1013 // the table region serving server. 1014 int metaServerIndex = cluster.getServerWithMeta(); 1015 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex); 1016 int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1017 assertTrue(tableRegionIndex != -1); 1018 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); 1019 LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other=" 1020 + tableRegionServer.getServerName()); 1021 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { 1022 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); 1023 assertNotNull(hrs); 1024 assertNotNull(hri); 1025 LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName() 1026 + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); 1027 admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName()); 1028 } 1029 // Wait till table region is up on the server that is NOT carrying hbase:meta. 1030 for (int i = 0; i < 100; i++) { 1031 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1032 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; 1033 LOG.debug("Waiting on region move off the hbase:meta server; current index " 1034 + tableRegionIndex + " and metaServerIndex=" + metaServerIndex); 1035 Thread.sleep(100); 1036 } 1037 assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex, 1038 tableRegionIndex != -1 && tableRegionIndex != metaServerIndex); 1039 // Verify for sure table region is not on same server as hbase:meta 1040 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1041 assertTrue(tableRegionIndex != -1); 1042 assertNotSame(metaServerIndex, tableRegionIndex); 1043 return tableRegionIndex; 1044 } 1045 1046 /** 1047 * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers 1048 * since crashed servers occupy an index. 1049 * @return A regionserver that is not <code>notThisOne</code> or null if none found 1050 */ 1051 private HRegionServer getOtherRegionServer(final SingleProcessHBaseCluster cluster, 1052 final HRegionServer notThisOne) { 1053 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 1054 HRegionServer hrs = rst.getRegionServer(); 1055 if (hrs.getServerName().equals(notThisOne.getServerName())) continue; 1056 if (hrs.isStopping() || hrs.isStopped()) continue; 1057 return hrs; 1058 } 1059 return null; 1060 } 1061 1062 private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException { 1063 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 1064 for (RegionInfo region : regions) { 1065 LOG.info(prefix + region.getRegionNameAsString()); 1066 } 1067 } 1068 1069 private void waitUntilRegionServerDead() throws InterruptedException, IOException { 1070 // Wait until the master processes the RS shutdown 1071 for (int i = 1072 0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1073 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) { 1074 LOG.info("Waiting on server to go down"); 1075 Thread.sleep(100); 1076 } 1077 assertFalse("Waited too long for RS to die", 1078 cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1079 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS); 1080 } 1081 1082 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException { 1083 // Wait till regions are back on line again. 1084 for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) { 1085 LOG.info("Waiting for repair to happen"); 1086 Thread.sleep(1000); 1087 } 1088 if (cluster.getRegions(tableName).size() < numDaughters) { 1089 fail("Waiting too long for daughter regions"); 1090 } 1091 } 1092 1093 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException { 1094 List<HRegion> regions = null; 1095 for (int i = 0; i < 100; i++) { 1096 regions = cluster.getRegions(tableName); 1097 if (regions.size() > 0) break; 1098 Thread.sleep(100); 1099 } 1100 return regions; 1101 } 1102 1103 private Table createTableAndWait(TableName tableName, byte[] cf) 1104 throws IOException, InterruptedException { 1105 Table t = TESTING_UTIL.createTable(tableName, cf); 1106 awaitTableRegions(tableName); 1107 assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0); 1108 return t; 1109 } 1110 1111 // Make it public so that JVMClusterUtil can access it. 1112 public static class MyMaster extends HMaster { 1113 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException { 1114 super(conf); 1115 } 1116 1117 @Override 1118 protected MasterRpcServices createRpcServices() throws IOException { 1119 return new MyMasterRpcServices(this); 1120 } 1121 } 1122 1123 static class MyMasterRpcServices extends MasterRpcServices { 1124 static AtomicBoolean enabled = new AtomicBoolean(false); 1125 1126 private HMaster myMaster; 1127 1128 public MyMasterRpcServices(HMaster master) throws IOException { 1129 super(master); 1130 myMaster = master; 1131 } 1132 1133 @Override 1134 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c, 1135 ReportRegionStateTransitionRequest req) throws ServiceException { 1136 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req); 1137 if ( 1138 enabled.get() 1139 && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT) 1140 && !resp.hasErrorMessage() 1141 ) { 1142 RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates(); 1143 for (RegionStateNode regionState : regionStates.getRegionsInTransition()) { 1144 /* 1145 * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew()) 1146 * { regionStates.deleteRegion(regionState.getRegion()); } 1147 */ 1148 } 1149 } 1150 return resp; 1151 } 1152 } 1153 1154 static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 1155 1156 @Override 1157 protected boolean shouldSplit() { 1158 return true; 1159 } 1160 1161 @Override 1162 public boolean skipStoreFileRangeCheck(String familyName) { 1163 if (familyName.startsWith("i_")) { 1164 return true; 1165 } else { 1166 return false; 1167 } 1168 } 1169 } 1170}