001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY; 021import static org.junit.jupiter.api.Assertions.assertEquals; 022import static org.junit.jupiter.api.Assertions.assertFalse; 023import static org.junit.jupiter.api.Assertions.assertNotEquals; 024import static org.junit.jupiter.api.Assertions.assertNotNull; 025import static org.junit.jupiter.api.Assertions.assertNotSame; 026import static org.junit.jupiter.api.Assertions.assertNull; 027import static org.junit.jupiter.api.Assertions.assertTrue; 028import static org.junit.jupiter.api.Assertions.fail; 029 030import java.io.IOException; 031import java.lang.reflect.Field; 032import java.util.ArrayList; 033import java.util.Collection; 034import java.util.List; 035import java.util.Map; 036import java.util.Optional; 037import java.util.concurrent.CountDownLatch; 038import java.util.concurrent.ExecutionException; 039import java.util.concurrent.TimeUnit; 040import java.util.concurrent.TimeoutException; 041import java.util.concurrent.atomic.AtomicBoolean; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.CellComparator; 046import org.apache.hadoop.hbase.Coprocessor; 047import org.apache.hadoop.hbase.CoprocessorEnvironment; 048import org.apache.hadoop.hbase.DoNotRetryIOException; 049import org.apache.hadoop.hbase.HBaseTestingUtil; 050import org.apache.hadoop.hbase.HConstants; 051import org.apache.hadoop.hbase.MasterNotRunningException; 052import org.apache.hadoop.hbase.PrivateCellUtil; 053import org.apache.hadoop.hbase.ServerName; 054import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 055import org.apache.hadoop.hbase.StartTestingClusterOption; 056import org.apache.hadoop.hbase.TableName; 057import org.apache.hadoop.hbase.ZooKeeperConnectionException; 058import org.apache.hadoop.hbase.client.Admin; 059import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 060import org.apache.hadoop.hbase.client.Consistency; 061import org.apache.hadoop.hbase.client.Delete; 062import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 063import org.apache.hadoop.hbase.client.Get; 064import org.apache.hadoop.hbase.client.Mutation; 065import org.apache.hadoop.hbase.client.Put; 066import org.apache.hadoop.hbase.client.RegionInfo; 067import org.apache.hadoop.hbase.client.Result; 068import org.apache.hadoop.hbase.client.ResultScanner; 069import org.apache.hadoop.hbase.client.Scan; 070import org.apache.hadoop.hbase.client.Table; 071import org.apache.hadoop.hbase.client.TableDescriptor; 072import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 073import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro; 074import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 075import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 076import org.apache.hadoop.hbase.coprocessor.MasterObserver; 077import org.apache.hadoop.hbase.coprocessor.ObserverContext; 078import org.apache.hadoop.hbase.io.HFileLink; 079import org.apache.hadoop.hbase.io.Reference; 080import org.apache.hadoop.hbase.master.HMaster; 081import org.apache.hadoop.hbase.master.MasterRpcServices; 082import org.apache.hadoop.hbase.master.RegionState; 083import org.apache.hadoop.hbase.master.RegionState.State; 084import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 085import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 086import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 087import org.apache.hadoop.hbase.master.assignment.RegionStates; 088import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 089import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 090import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 091import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 092import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 093import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 094import org.apache.hadoop.hbase.testclassification.LargeTests; 095import org.apache.hadoop.hbase.testclassification.RegionServerTests; 096import org.apache.hadoop.hbase.util.Bytes; 097import org.apache.hadoop.hbase.util.CommonFSUtils; 098import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 099import org.apache.hadoop.hbase.util.FSUtils; 100import org.apache.hadoop.hbase.util.FutureUtils; 101import org.apache.hadoop.hbase.util.HBaseFsck; 102import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 103import org.apache.hadoop.hbase.util.Threads; 104import org.apache.zookeeper.KeeperException; 105import org.apache.zookeeper.KeeperException.NodeExistsException; 106import org.junit.jupiter.api.AfterAll; 107import org.junit.jupiter.api.AfterEach; 108import org.junit.jupiter.api.BeforeAll; 109import org.junit.jupiter.api.BeforeEach; 110import org.junit.jupiter.api.Tag; 111import org.junit.jupiter.api.Test; 112import org.junit.jupiter.api.TestInfo; 113import org.mockito.Mockito; 114import org.slf4j.Logger; 115import org.slf4j.LoggerFactory; 116 117import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 118import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 119import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 120 121import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 122import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 123import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 124import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 125 126/** 127 * The below tests are testing split region against a running cluster 128 */ 129@Tag(RegionServerTests.TAG) 130@Tag(LargeTests.TAG) 131public class TestSplitTransactionOnCluster { 132 133 private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class); 134 private Admin admin = null; 135 private SingleProcessHBaseCluster cluster = null; 136 private static final int NB_SERVERS = 3; 137 138 static final HBaseTestingUtil TESTING_UTIL = new HBaseTestingUtil(); 139 140 private String methodName; 141 142 @BeforeAll 143 public static void before() throws Exception { 144 TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); 145 StartTestingClusterOption option = StartTestingClusterOption.builder() 146 .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build(); 147 TESTING_UTIL.startMiniCluster(option); 148 } 149 150 @AfterAll 151 public static void after() throws Exception { 152 TESTING_UTIL.shutdownMiniCluster(); 153 } 154 155 @BeforeEach 156 public void setup(TestInfo testInfo) throws IOException { 157 this.methodName = testInfo.getTestMethod().get().getName(); 158 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS); 159 this.admin = TESTING_UTIL.getAdmin(); 160 this.cluster = TESTING_UTIL.getMiniHBaseCluster(); 161 } 162 163 @AfterEach 164 public void tearDown() throws Exception { 165 this.admin.close(); 166 for (TableDescriptor htd : this.admin.listTableDescriptors()) { 167 LOG.info("Tear down, remove table=" + htd.getTableName()); 168 TESTING_UTIL.deleteTable(htd.getTableName()); 169 } 170 } 171 172 private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) 173 throws IOException, InterruptedException { 174 assertEquals(1, regions.size()); 175 RegionInfo hri = regions.get(0).getRegionInfo(); 176 AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); 177 return hri; 178 } 179 180 private void requestSplitRegion(final HRegionServer rsServer, final Region region, 181 final byte[] midKey) throws IOException { 182 long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0); 183 // wait for the split to complete or get interrupted. If the split completes successfully, 184 // the procedure will return true; if the split fails, the procedure would throw exception. 185 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 186 } 187 188 @Test 189 public void testRITStateForRollback() throws Exception { 190 final TableName tableName = TableName.valueOf(methodName); 191 final HMaster master = cluster.getMaster(); 192 try { 193 // Create table then get the single region for our new table. 194 Table t = createTableAndWait(tableName, Bytes.toBytes("cf")); 195 final List<HRegion> regions = cluster.getRegions(tableName); 196 final RegionInfo hri = getAndCheckSingleTableRegion(regions); 197 insertData(tableName, admin, t); 198 t.close(); 199 200 // Turn off balancer so it doesn't cut in and mess up our placements. 201 this.admin.balancerSwitch(false, true); 202 // Turn off the meta scanner so it don't remove parent on us. 203 master.setCatalogJanitorEnabled(false); 204 205 // find a splittable region 206 final HRegion region = findSplittableRegion(regions); 207 assertNotNull(region, "not able to find a splittable region"); 208 209 // install master co-processor to fail splits 210 master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class, 211 Coprocessor.PRIORITY_USER, master.getConfiguration()); 212 213 // split async 214 this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 }); 215 216 // we have to wait until the SPLITTING state is seen by the master 217 FailingSplitMasterObserver observer = 218 master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class); 219 assertNotNull(observer); 220 observer.latch.await(); 221 222 LOG.info("Waiting for region to come out of RIT"); 223 while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) { 224 Threads.sleep(100); 225 } 226 assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)); 227 } finally { 228 admin.balancerSwitch(true, false); 229 master.setCatalogJanitorEnabled(true); 230 abortAndWaitForMaster(); 231 TESTING_UTIL.deleteTable(tableName); 232 } 233 } 234 235 @Test 236 public void testSplitFailedCompactionAndSplit() throws Exception { 237 final TableName tableName = TableName.valueOf(methodName); 238 // Create table then get the single region for our new table. 239 byte[] cf = Bytes.toBytes("cf"); 240 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 241 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 242 admin.createTable(htd); 243 244 for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) { 245 Thread.sleep(100); 246 } 247 assertEquals(1, cluster.getRegions(tableName).size()); 248 249 HRegion region = cluster.getRegions(tableName).get(0); 250 HStore store = region.getStore(cf); 251 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 252 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 253 254 Table t = TESTING_UTIL.getConnection().getTable(tableName); 255 // insert data 256 insertData(tableName, admin, t); 257 insertData(tableName, admin, t); 258 259 int fileNum = store.getStorefiles().size(); 260 // 0, Compaction Request 261 store.triggerMajorCompaction(); 262 Optional<CompactionContext> cc = store.requestCompaction(); 263 assertTrue(cc.isPresent()); 264 // 1, A timeout split 265 // 1.1 close region 266 assertEquals(2, region.close(false).get(cf).size()); 267 // 1.2 rollback and Region initialize again 268 region.initialize(); 269 270 // 2, Run Compaction cc 271 assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE)); 272 assertTrue(fileNum > store.getStorefiles().size()); 273 274 // 3, Split 275 requestSplitRegion(regionServer, region, Bytes.toBytes("row3")); 276 assertEquals(2, cluster.getRegions(tableName).size()); 277 } 278 279 @Test 280 public void testSplitCompactWithPriority() throws Exception { 281 final TableName tableName = TableName.valueOf(methodName); 282 // Create table then get the single region for our new table. 283 byte[] cf = Bytes.toBytes("cf"); 284 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 285 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 286 admin.createTable(htd); 287 288 assertNotEquals(-1, 289 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1), 290 "Unable to retrieve regions of the table"); 291 292 HRegion region = cluster.getRegions(tableName).get(0); 293 HStore store = region.getStore(cf); 294 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 295 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 296 297 Table table = TESTING_UTIL.getConnection().getTable(tableName); 298 // insert data 299 insertData(tableName, admin, table); 300 insertData(tableName, admin, table, 20); 301 insertData(tableName, admin, table, 40); 302 303 // Compaction Request 304 store.triggerMajorCompaction(); 305 Optional<CompactionContext> compactionContext = store.requestCompaction(); 306 assertTrue(compactionContext.isPresent()); 307 assertFalse(compactionContext.get().getRequest().isAfterSplit()); 308 assertEquals(13, compactionContext.get().getRequest().getPriority()); 309 310 // Split 311 long procId = 312 cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0); 313 314 // wait for the split to complete or get interrupted. If the split completes successfully, 315 // the procedure will return true; if the split fails, the procedure would throw exception. 316 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 317 Thread.sleep(3000); 318 assertNotEquals(-1, TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2), 319 "Table is not split properly?"); 320 // we have 2 daughter regions 321 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 322 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 323 HStore hStore1 = hRegion1.getStore(cf); 324 HStore hStore2 = hRegion2.getStore(cf); 325 326 // For hStore1 && hStore2, set mock reference to one of the storeFiles 327 StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo(); 328 StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo(); 329 Field field = StoreFileInfo.class.getDeclaredField("reference"); 330 field.setAccessible(true); 331 field.set(storeFileInfo1, Mockito.mock(Reference.class)); 332 field.set(storeFileInfo2, Mockito.mock(Reference.class)); 333 hStore1.triggerMajorCompaction(); 334 hStore2.triggerMajorCompaction(); 335 336 compactionContext = hStore1.requestCompaction(); 337 assertTrue(compactionContext.isPresent()); 338 // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true && 339 // highest priority for hStore1's compactionContext 340 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 341 assertEquals(Integer.MIN_VALUE + 1000, compactionContext.get().getRequest().getPriority()); 342 343 compactionContext = 344 hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null); 345 assertTrue(compactionContext.isPresent()); 346 // compaction request contains higher priority than default priority of daughter region 347 // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to 348 // be accepted. 349 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 350 assertEquals(Integer.MIN_VALUE + 10, compactionContext.get().getRequest().getPriority()); 351 admin.disableTable(tableName); 352 admin.deleteTable(tableName); 353 } 354 355 @Test 356 public void testContinuousSplitUsingLinkFile() throws Exception { 357 final TableName tableName = TableName.valueOf(methodName); 358 // Create table then get the single region for our new table. 359 byte[] cf = Bytes.toBytes("cf"); 360 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 361 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)); 362 String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName(); 363 builder.setValue(SPLIT_POLICY, splitPolicy); 364 365 admin.createTable(builder.build()); 366 admin.compactionSwitch(false, new ArrayList<>()); 367 368 assertNotEquals(-1, 369 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1), 370 "Unable to retrieve regions of the table"); 371 Table table = TESTING_UTIL.getConnection().getTable(tableName); 372 // insert data 373 insertData(tableName, admin, table, 10); 374 insertData(tableName, admin, table, 20); 375 insertData(tableName, admin, table, 40); 376 int rowCount = 3 * 4; 377 Scan scan = new Scan(); 378 scanValidate(scan, rowCount, table); 379 380 // Split 381 admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(), 382 Bytes.toBytes("row14")); 383 // wait for the split to complete or get interrupted. If the split completes successfully, 384 // the procedure will return true; if the split fails, the procedure would throw exception. 385 Thread.sleep(3000); 386 assertNotEquals(-1, TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2), 387 "Table is not split properly?"); 388 // we have 2 daughter regions 389 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 390 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 391 HStore hStore1 = hRegion1.getStore(cf); 392 HStore hStore2 = hRegion2.getStore(cf); 393 // the sum of store files of the two children should be equal to their parent 394 assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount()); 395 // both the two children should have link files 396 for (StoreFile sf : hStore1.getStorefiles()) { 397 assertTrue(HFileLink.isHFileLink(sf.getPath())); 398 } 399 for (StoreFile sf : hStore2.getStorefiles()) { 400 assertTrue(HFileLink.isHFileLink(sf.getPath())); 401 } 402 // validate children data 403 scan = new Scan(); 404 scanValidate(scan, rowCount, table); 405 406 // Continuous Split 407 findRegionToSplit(tableName, "row24"); 408 Thread.sleep(3000); 409 assertNotEquals(-1, TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3), 410 "Table is not split properly?"); 411 // now table has 3 region, each region should have one link file 412 for (HRegion newRegion : cluster.getRegions(tableName)) { 413 assertEquals(1, newRegion.getStore(cf).getStorefilesCount()); 414 assertTrue( 415 HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath())); 416 } 417 418 scan = new Scan(); 419 scanValidate(scan, rowCount, table); 420 421 // Continuous Split, random split HFileLink, generate Reference files. 422 // After this, can not continuous split, because there are reference files. 423 findRegionToSplit(tableName, "row11"); 424 Thread.sleep(3000); 425 assertNotEquals(-1, TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4), 426 "Table is not split properly?"); 427 428 scan = new Scan(); 429 scanValidate(scan, rowCount, table); 430 } 431 432 private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception { 433 HRegion toSplit = null; 434 byte[] toSplitKey = Bytes.toBytes(splitRowKey); 435 for (HRegion rg : cluster.getRegions(tableName)) { 436 LOG.debug( 437 "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()=" 438 + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey); 439 if ( 440 (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare( 441 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()), 442 PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0) 443 && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare( 444 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()), 445 PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0) 446 ) { 447 toSplit = rg; 448 } 449 } 450 assertNotNull(toSplit); 451 admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey); 452 } 453 454 private static void scanValidate(Scan scan, int expectedRowCount, Table table) 455 throws IOException { 456 ResultScanner scanner = table.getScanner(scan); 457 int rows = 0; 458 for (Result result : scanner) { 459 rows++; 460 } 461 scanner.close(); 462 assertEquals(expectedRowCount, rows); 463 } 464 465 public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver { 466 volatile CountDownLatch latch; 467 468 @Override 469 public void start(CoprocessorEnvironment e) throws IOException { 470 latch = new CountDownLatch(1); 471 } 472 473 @Override 474 public Optional<MasterObserver> getMasterObserver() { 475 return Optional.of(this); 476 } 477 478 @Override 479 public void preSplitRegionBeforeMETAAction( 480 final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey, 481 final List<Mutation> metaEntries) throws IOException { 482 latch.countDown(); 483 throw new IOException("Causing rollback of region split"); 484 } 485 } 486 487 @Test 488 public void testSplitRollbackOnRegionClosing() throws Exception { 489 final TableName tableName = TableName.valueOf(methodName); 490 491 // Create table then get the single region for our new table. 492 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 493 List<HRegion> regions = cluster.getRegions(tableName); 494 RegionInfo hri = getAndCheckSingleTableRegion(regions); 495 496 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 497 498 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); 499 500 // Turn off balancer so it doesn't cut in and mess up our placements. 501 this.admin.balancerSwitch(false, true); 502 // Turn off the meta scanner so it don't remove parent on us. 503 cluster.getMaster().setCatalogJanitorEnabled(false); 504 try { 505 // Add a bit of load up into the table so splittable. 506 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 507 // Get region pre-split. 508 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 509 printOutRegions(server, "Initial regions: "); 510 int regionCount = cluster.getRegions(hri.getTable()).size(); 511 regionStates.updateRegionState(hri, RegionState.State.CLOSING); 512 513 // Now try splitting.... should fail. And each should successfully 514 // rollback. 515 // We don't roll back here anymore. Instead we fail-fast on construction of the 516 // split transaction. Catch the exception instead. 517 try { 518 FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName())); 519 fail(); 520 } catch (DoNotRetryRegionException e) { 521 // Expected 522 } 523 // Wait around a while and assert count of regions remains constant. 524 for (int i = 0; i < 10; i++) { 525 Thread.sleep(100); 526 assertEquals(regionCount, cluster.getRegions(hri.getTable()).size()); 527 } 528 regionStates.updateRegionState(hri, State.OPEN); 529 // Now try splitting and it should work. 530 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 531 // Get daughters 532 checkAndGetDaughters(tableName); 533 // OK, so split happened after we cleared the blocking node. 534 } finally { 535 admin.balancerSwitch(true, false); 536 cluster.getMaster().setCatalogJanitorEnabled(true); 537 t.close(); 538 } 539 } 540 541 /** 542 * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't 543 * find the immediate daughter of an offlined parent. 544 */ 545 @Test 546 public void testShutdownFixupWhenDaughterHasSplit() throws Exception { 547 final TableName tableName = TableName.valueOf(methodName); 548 549 // Create table then get the single region for our new table. 550 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 551 List<HRegion> regions = cluster.getRegions(tableName); 552 RegionInfo hri = getAndCheckSingleTableRegion(regions); 553 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 554 555 // Turn off balancer so it doesn't cut in and mess up our placements. 556 this.admin.balancerSwitch(false, true); 557 // Turn off the meta scanner so it don't remove parent on us. 558 cluster.getMaster().setCatalogJanitorEnabled(false); 559 try { 560 // Add a bit of load up into the table so splittable. 561 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 562 // Get region pre-split. 563 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 564 printOutRegions(server, "Initial regions: "); 565 // Now split. 566 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 567 // Get daughters 568 List<HRegion> daughters = checkAndGetDaughters(tableName); 569 // Now split one of the daughters. 570 HRegion daughterRegion = daughters.get(0); 571 RegionInfo daughter = daughterRegion.getRegionInfo(); 572 LOG.info("Daughter we are going to split: " + daughter); 573 clearReferences(daughterRegion); 574 LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences()); 575 admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES); 576 // Get list of daughters 577 daughters = cluster.getRegions(tableName); 578 for (HRegion d : daughters) { 579 LOG.info("Regions before crash: " + d); 580 } 581 // Now crash the server 582 cluster.abortRegionServer(tableRegionIndex); 583 waitUntilRegionServerDead(); 584 awaitDaughters(tableName, daughters.size()); 585 // Assert daughters are online and ONLY the original daughters -- that 586 // fixup didn't insert one during server shutdown recover. 587 regions = cluster.getRegions(tableName); 588 for (HRegion d : daughters) { 589 LOG.info("Regions after crash: " + d); 590 } 591 if (daughters.size() != regions.size()) { 592 LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size()); 593 } 594 assertEquals(daughters.size(), regions.size()); 595 for (HRegion r : regions) { 596 LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r)); 597 assertTrue(daughters.contains(r), "Missing region post crash " + r); 598 } 599 } finally { 600 LOG.info("EXITING"); 601 admin.balancerSwitch(true, false); 602 cluster.getMaster().setCatalogJanitorEnabled(true); 603 t.close(); 604 } 605 } 606 607 private void clearReferences(HRegion region) throws IOException { 608 // Presumption. 609 assertEquals(1, region.getStores().size()); 610 HStore store = region.getStores().get(0); 611 while (store.hasReferences()) { 612 while (store.storeEngine.getCompactor().isCompacting()) { 613 Threads.sleep(100); 614 } 615 // Run new compaction. Shoudn't be any others running. 616 region.compact(true); 617 store.closeAndArchiveCompactedFiles(); 618 } 619 } 620 621 @Test 622 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception { 623 TableName userTableName = TableName.valueOf(methodName); 624 TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName) 625 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build(); 626 admin.createTable(htd); 627 Table table = TESTING_UTIL.getConnection().getTable(userTableName); 628 try { 629 for (int i = 0; i <= 5; i++) { 630 String row = "row" + i; 631 Put p = new Put(Bytes.toBytes(row)); 632 String val = "Val" + i; 633 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val)); 634 table.put(p); 635 admin.flush(userTableName); 636 Delete d = new Delete(Bytes.toBytes(row)); 637 // Do a normal delete 638 table.delete(d); 639 admin.flush(userTableName); 640 } 641 admin.majorCompact(userTableName); 642 List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 643 .getRegionsOfTable(userTableName); 644 assertEquals(1, regionsOfTable.size()); 645 RegionInfo hRegionInfo = regionsOfTable.get(0); 646 Put p = new Put(Bytes.toBytes("row6")); 647 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 648 table.put(p); 649 p = new Put(Bytes.toBytes("row7")); 650 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 651 table.put(p); 652 p = new Put(Bytes.toBytes("row8")); 653 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 654 table.put(p); 655 admin.flush(userTableName); 656 admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7")); 657 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 658 .getRegionsOfTable(userTableName); 659 660 while (regionsOfTable.size() != 2) { 661 Thread.sleep(1000); 662 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 663 .getRegionsOfTable(userTableName); 664 LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": " 665 + regionsOfTable); 666 667 } 668 assertEquals(2, regionsOfTable.size()); 669 670 Scan s = new Scan(); 671 ResultScanner scanner = table.getScanner(s); 672 int mainTableCount = 0; 673 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { 674 mainTableCount++; 675 } 676 assertEquals(3, mainTableCount); 677 } finally { 678 table.close(); 679 } 680 } 681 682 /** 683 * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could 684 * remove the parent region the master is killed and restarted. 685 */ 686 @Test 687 public void testMasterRestartAtRegionSplitPendingCatalogJanitor() 688 throws IOException, InterruptedException, NodeExistsException, KeeperException, 689 ServiceException, ExecutionException, TimeoutException { 690 final TableName tableName = TableName.valueOf(methodName); 691 // Create table then get the single region for our new table. 692 try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) { 693 List<HRegion> regions = cluster.getRegions(tableName); 694 RegionInfo hri = getAndCheckSingleTableRegion(regions); 695 696 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 697 698 // Turn off balancer so it doesn't cut in and mess up our placements. 699 this.admin.balancerSwitch(false, true); 700 // Turn off the meta scanner so it don't remove parent on us. 701 cluster.getMaster().setCatalogJanitorEnabled(false); 702 // Add a bit of load up into the table so splittable. 703 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 704 // Get region pre-split. 705 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 706 printOutRegions(server, "Initial regions: "); 707 // Call split. 708 this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 709 List<HRegion> daughters = checkAndGetDaughters(tableName); 710 711 // Before cleanup, get a new master. 712 HMaster master = abortAndWaitForMaster(); 713 // Now call compact on the daughters and clean up any references. 714 for (HRegion daughter : daughters) { 715 clearReferences(daughter); 716 assertFalse(daughter.hasReferences()); 717 } 718 // BUT calling compact on the daughters is not enough. The CatalogJanitor looks 719 // in the filesystem, and the filesystem content is not same as what the Region 720 // is reading from. Compacted-away files are picked up later by the compacted 721 // file discharger process. It runs infrequently. Make it run so CatalogJanitor 722 // doens't find any references. 723 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 724 boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); 725 rst.getRegionServer().compactedFileDischarger.run(); 726 rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); 727 } 728 cluster.getMaster().setCatalogJanitorEnabled(true); 729 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 730 LOG.info("Starting run of CatalogJanitor"); 731 cluster.getMaster().getCatalogJanitor().run(); 732 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 733 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 734 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri); 735 assertEquals(null, regionServerOfRegion); 736 } finally { 737 TESTING_UTIL.getAdmin().balancerSwitch(true, false); 738 cluster.getMaster().setCatalogJanitorEnabled(true); 739 } 740 } 741 742 @Test 743 public void testSplitWithRegionReplicas() throws Exception { 744 final TableName tableName = TableName.valueOf(methodName); 745 TableDescriptor htd = TESTING_UTIL 746 .createModifyableTableDescriptor(TableName.valueOf(methodName), 747 ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER, 748 ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED) 749 .setRegionReplication(2).setCoprocessor(SlowMeCopro.class.getName()).build(); 750 // Create table then get the single region for our new table. 751 Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null); 752 List<HRegion> oldRegions; 753 do { 754 oldRegions = cluster.getRegions(tableName); 755 Thread.sleep(10); 756 } while (oldRegions.size() != 2); 757 for (HRegion h : oldRegions) 758 LOG.debug("OLDREGION " + h.getRegionInfo()); 759 try { 760 int regionServerIndex = 761 cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName()); 762 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 763 insertData(tableName, admin, t); 764 // Turn off balancer so it doesn't cut in and mess up our placements. 765 admin.balancerSwitch(false, true); 766 // Turn off the meta scanner so it don't remove parent on us. 767 cluster.getMaster().setCatalogJanitorEnabled(false); 768 boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 769 assertEquals(true, tableExists, "The specified table should be present."); 770 final HRegion region = findSplittableRegion(oldRegions); 771 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 772 regionServer = cluster.getRegionServer(regionServerIndex); 773 assertTrue(region != null, "not able to find a splittable region"); 774 try { 775 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 776 } catch (IOException e) { 777 e.printStackTrace(); 778 fail("Split execution should have succeeded with no exceptions thrown " + e); 779 } 780 // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName); 781 List<HRegion> newRegions; 782 do { 783 newRegions = cluster.getRegions(tableName); 784 for (HRegion h : newRegions) 785 LOG.debug("NEWREGION " + h.getRegionInfo()); 786 Thread.sleep(1000); 787 } while ( 788 (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1))) 789 || newRegions.size() != 4 790 ); 791 tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 792 assertEquals(true, tableExists, "The specified table should be present."); 793 // exists works on stale and we see the put after the flush 794 byte[] b1 = Bytes.toBytes("row1"); 795 Get g = new Get(b1); 796 g.setConsistency(Consistency.STRONG); 797 // The following GET will make a trip to the meta to get the new location of the 1st daughter 798 // In the process it will also get the location of the replica of the daughter (initially 799 // pointing to the parent's replica) 800 Result r = t.get(g); 801 assertFalse(r.isStale()); 802 LOG.info("exists stale after flush done"); 803 804 SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1)); 805 g = new Get(b1); 806 g.setConsistency(Consistency.TIMELINE); 807 // This will succeed because in the previous GET we get the location of the replica 808 r = t.get(g); 809 assertTrue(r.isStale()); 810 SlowMeCopro.getPrimaryCdl().get().countDown(); 811 } finally { 812 SlowMeCopro.getPrimaryCdl().get().countDown(); 813 admin.balancerSwitch(true, false); 814 cluster.getMaster().setCatalogJanitorEnabled(true); 815 t.close(); 816 } 817 } 818 819 private void insertData(final TableName tableName, Admin admin, Table t) throws IOException { 820 insertData(tableName, admin, t, 1); 821 } 822 823 private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException { 824 Put p = new Put(Bytes.toBytes("row" + i)); 825 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); 826 t.put(p); 827 p = new Put(Bytes.toBytes("row" + (i + 1))); 828 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); 829 t.put(p); 830 p = new Put(Bytes.toBytes("row" + (i + 2))); 831 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); 832 t.put(p); 833 p = new Put(Bytes.toBytes("row" + (i + 3))); 834 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); 835 t.put(p); 836 admin.flush(tableName); 837 } 838 839 /** 840 * If a table has regions that have no store files in a region, they should split successfully 841 * into two regions with no store files. 842 */ 843 @Test 844 public void testSplitRegionWithNoStoreFiles() throws Exception { 845 final TableName tableName = TableName.valueOf(methodName); 846 // Create table then get the single region for our new table. 847 createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 848 List<HRegion> regions = cluster.getRegions(tableName); 849 RegionInfo hri = getAndCheckSingleTableRegion(regions); 850 ensureTableRegionNotOnSameServerAsMeta(admin, hri); 851 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); 852 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 853 // Turn off balancer so it doesn't cut in and mess up our placements. 854 this.admin.balancerSwitch(false, true); 855 // Turn off the meta scanner so it don't remove parent on us. 856 cluster.getMaster().setCatalogJanitorEnabled(false); 857 try { 858 // Precondition: we created a table with no data, no store files. 859 printOutRegions(regionServer, "Initial regions: "); 860 Configuration conf = cluster.getConfiguration(); 861 HBaseFsck.debugLsr(conf, new Path("/")); 862 Path rootDir = CommonFSUtils.getRootDir(conf); 863 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem(); 864 Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 865 assertEquals(0, storefiles.size(), "Expected nothing but found " + storefiles.toString()); 866 867 // find a splittable region. Refresh the regions list 868 regions = cluster.getRegions(tableName); 869 final HRegion region = findSplittableRegion(regions); 870 assertTrue(region != null, "not able to find a splittable region"); 871 872 // Now split. 873 try { 874 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 875 } catch (IOException e) { 876 fail("Split execution should have succeeded with no exceptions thrown"); 877 } 878 879 // Postcondition: split the table with no store files into two regions, but still have no 880 // store files 881 List<HRegion> daughters = cluster.getRegions(tableName); 882 assertEquals(2, daughters.size()); 883 884 // check dirs 885 HBaseFsck.debugLsr(conf, new Path("/")); 886 Map<String, Path> storefilesAfter = 887 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 888 assertEquals(0, storefilesAfter.size(), 889 "Expected nothing but found " + storefilesAfter.toString()); 890 891 hri = region.getRegionInfo(); // split parent 892 AssignmentManager am = cluster.getMaster().getAssignmentManager(); 893 RegionStates regionStates = am.getRegionStates(); 894 long start = EnvironmentEdgeManager.currentTime(); 895 while (!regionStates.isRegionInState(hri, State.SPLIT)) { 896 LOG.debug("Waiting for SPLIT state on: " + hri); 897 assertFalse(EnvironmentEdgeManager.currentTime() - start > 60000, 898 "Timed out in waiting split parent to be in state SPLIT"); 899 Thread.sleep(500); 900 } 901 assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN)); 902 assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN)); 903 904 // We should not be able to assign it again 905 try { 906 am.assign(hri); 907 } catch (DoNotRetryIOException e) { 908 // Expected 909 } 910 assertFalse(am.isRegionInTransition(hri), "Split region can't be assigned"); 911 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 912 913 // We should not be able to unassign it either 914 try { 915 am.unassign(hri); 916 fail("Should have thrown exception"); 917 } catch (DoNotRetryIOException e) { 918 // Expected 919 } 920 assertFalse(am.isRegionInTransition(hri), "Split region can't be unassigned"); 921 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 922 } finally { 923 admin.balancerSwitch(true, false); 924 cluster.getMaster().setCatalogJanitorEnabled(true); 925 } 926 } 927 928 @Test 929 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception { 930 final TableName tableName = TableName.valueOf(methodName); 931 try { 932 byte[] cf = Bytes.toBytes("f"); 933 byte[] cf1 = Bytes.toBytes("i_f"); 934 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 935 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)) 936 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1)) 937 .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build(); 938 admin.createTable(htd); 939 List<HRegion> regions = awaitTableRegions(tableName); 940 HRegion region = regions.get(0); 941 for (int i = 3; i < 9; i++) { 942 Put p = new Put(Bytes.toBytes("row" + i)); 943 p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 944 p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 945 region.put(p); 946 } 947 region.flush(true); 948 HStore store = region.getStore(cf); 949 Collection<HStoreFile> storefiles = store.getStorefiles(); 950 assertEquals(1, storefiles.size()); 951 assertFalse(region.hasReferences()); 952 HRegionFileSystem hfs = region.getRegionFileSystem(); 953 StoreFileTracker sft = StoreFileTrackerFactory.create(TESTING_UTIL.getConfiguration(), true, 954 store.getStoreContext()); 955 StoreFileInfo referenceStoreFileInfo = hfs.splitStoreFile(region.getRegionInfo(), "f", 956 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft); 957 assertNull(referenceStoreFileInfo); 958 Path referencePath = 959 hfs.splitStoreFile(region.getRegionInfo(), "i_f", storefiles.iterator().next(), 960 Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft).getPath(); 961 assertNotNull(referencePath); 962 } finally { 963 TESTING_UTIL.deleteTable(tableName); 964 } 965 } 966 967 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException { 968 for (int i = 0; i < 5; ++i) { 969 for (HRegion r : regions) { 970 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) { 971 return (r); 972 } 973 } 974 Thread.sleep(100); 975 } 976 return null; 977 } 978 979 private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException { 980 List<HRegion> daughters = null; 981 // try up to 10s 982 for (int i = 0; i < 100; i++) { 983 daughters = cluster.getRegions(tableName); 984 if (daughters.size() >= 2) { 985 break; 986 } 987 Thread.sleep(100); 988 } 989 assertTrue(daughters.size() >= 2); 990 return daughters; 991 } 992 993 private HMaster abortAndWaitForMaster() throws IOException, InterruptedException { 994 cluster.abortMaster(0); 995 cluster.waitOnMaster(0); 996 HMaster master = cluster.startMaster().getMaster(); 997 cluster.waitForActiveAndReadyMaster(); 998 // reset the connections 999 Closeables.close(admin, true); 1000 TESTING_UTIL.invalidateConnection(); 1001 admin = TESTING_UTIL.getAdmin(); 1002 return master; 1003 } 1004 1005 /** 1006 * Ensure single table region is not on same server as the single hbase:meta table region. 1007 * @return Index of the server hosting the single table region 1008 */ 1009 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri) 1010 throws IOException, MasterNotRunningException, ZooKeeperConnectionException, 1011 InterruptedException { 1012 // Now make sure that the table region is not on same server as that hosting 1013 // hbase:meta We don't want hbase:meta replay polluting our test when we later crash 1014 // the table region serving server. 1015 int metaServerIndex = cluster.getServerWithMeta(); 1016 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex); 1017 int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1018 assertTrue(tableRegionIndex != -1); 1019 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); 1020 LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other=" 1021 + tableRegionServer.getServerName()); 1022 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { 1023 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); 1024 assertNotNull(hrs); 1025 assertNotNull(hri); 1026 LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName() 1027 + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); 1028 admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName()); 1029 } 1030 // Wait till table region is up on the server that is NOT carrying hbase:meta. 1031 for (int i = 0; i < 100; i++) { 1032 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1033 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; 1034 LOG.debug("Waiting on region move off the hbase:meta server; current index " 1035 + tableRegionIndex + " and metaServerIndex=" + metaServerIndex); 1036 Thread.sleep(100); 1037 } 1038 assertTrue(tableRegionIndex != -1 && tableRegionIndex != metaServerIndex, 1039 "Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex); 1040 // Verify for sure table region is not on same server as hbase:meta 1041 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1042 assertTrue(tableRegionIndex != -1); 1043 assertNotSame(metaServerIndex, tableRegionIndex); 1044 return tableRegionIndex; 1045 } 1046 1047 /** 1048 * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers 1049 * since crashed servers occupy an index. 1050 * @return A regionserver that is not <code>notThisOne</code> or null if none found 1051 */ 1052 private HRegionServer getOtherRegionServer(final SingleProcessHBaseCluster cluster, 1053 final HRegionServer notThisOne) { 1054 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 1055 HRegionServer hrs = rst.getRegionServer(); 1056 if (hrs.getServerName().equals(notThisOne.getServerName())) continue; 1057 if (hrs.isStopping() || hrs.isStopped()) continue; 1058 return hrs; 1059 } 1060 return null; 1061 } 1062 1063 private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException { 1064 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 1065 for (RegionInfo region : regions) { 1066 LOG.info(prefix + region.getRegionNameAsString()); 1067 } 1068 } 1069 1070 private void waitUntilRegionServerDead() throws InterruptedException, IOException { 1071 // Wait until the master processes the RS shutdown 1072 for (int i = 1073 0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1074 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) { 1075 LOG.info("Waiting on server to go down"); 1076 Thread.sleep(100); 1077 } 1078 assertFalse( 1079 cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1080 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS, 1081 "Waited too long for RS to die"); 1082 } 1083 1084 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException { 1085 // Wait till regions are back on line again. 1086 for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) { 1087 LOG.info("Waiting for repair to happen"); 1088 Thread.sleep(1000); 1089 } 1090 if (cluster.getRegions(tableName).size() < numDaughters) { 1091 fail("Waiting too long for daughter regions"); 1092 } 1093 } 1094 1095 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException { 1096 List<HRegion> regions = null; 1097 for (int i = 0; i < 100; i++) { 1098 regions = cluster.getRegions(tableName); 1099 if (regions.size() > 0) break; 1100 Thread.sleep(100); 1101 } 1102 return regions; 1103 } 1104 1105 private Table createTableAndWait(TableName tableName, byte[] cf) 1106 throws IOException, InterruptedException { 1107 Table t = TESTING_UTIL.createTable(tableName, cf); 1108 awaitTableRegions(tableName); 1109 assertTrue(cluster.getRegions(tableName).size() != 0, "Table not online: " + tableName); 1110 return t; 1111 } 1112 1113 // Make it public so that JVMClusterUtil can access it. 1114 public static class MyMaster extends HMaster { 1115 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException { 1116 super(conf); 1117 } 1118 1119 @Override 1120 protected MasterRpcServices createRpcServices() throws IOException { 1121 return new MyMasterRpcServices(this); 1122 } 1123 } 1124 1125 static class MyMasterRpcServices extends MasterRpcServices { 1126 static AtomicBoolean enabled = new AtomicBoolean(false); 1127 1128 private HMaster myMaster; 1129 1130 public MyMasterRpcServices(HMaster master) throws IOException { 1131 super(master); 1132 myMaster = master; 1133 } 1134 1135 @Override 1136 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c, 1137 ReportRegionStateTransitionRequest req) throws ServiceException { 1138 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req); 1139 if ( 1140 enabled.get() 1141 && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT) 1142 && !resp.hasErrorMessage() 1143 ) { 1144 AssignmentManager am = myMaster.getAssignmentManager(); 1145 for (RegionStateNode regionState : am.getRegionsInTransition()) { 1146 if (regionState.toRegionState().isSplittingNew()) { 1147 am.getRegionStates().deleteRegion(regionState.toRegionState().getRegion()); 1148 } 1149 } 1150 } 1151 return resp; 1152 } 1153 } 1154 1155 static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 1156 1157 @Override 1158 protected boolean shouldSplit() { 1159 return true; 1160 } 1161 1162 @Override 1163 public boolean skipStoreFileRangeCheck(String familyName) { 1164 if (familyName.startsWith("i_")) { 1165 return true; 1166 } else { 1167 return false; 1168 } 1169 } 1170 } 1171}