001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNotSame; 026import static org.junit.Assert.assertNull; 027import static org.junit.Assert.assertTrue; 028import static org.junit.Assert.fail; 029 030import java.io.IOException; 031import java.lang.reflect.Field; 032import java.util.ArrayList; 033import java.util.Collection; 034import java.util.List; 035import java.util.Map; 036import java.util.Optional; 037import java.util.concurrent.CountDownLatch; 038import java.util.concurrent.ExecutionException; 039import java.util.concurrent.TimeUnit; 040import java.util.concurrent.TimeoutException; 041import java.util.concurrent.atomic.AtomicBoolean; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.CellComparator; 046import org.apache.hadoop.hbase.Coprocessor; 047import org.apache.hadoop.hbase.CoprocessorEnvironment; 048import org.apache.hadoop.hbase.DoNotRetryIOException; 049import org.apache.hadoop.hbase.HBaseClassTestRule; 050import org.apache.hadoop.hbase.HBaseTestingUtility; 051import org.apache.hadoop.hbase.HConstants; 052import org.apache.hadoop.hbase.HTableDescriptor; 053import org.apache.hadoop.hbase.MasterNotRunningException; 054import org.apache.hadoop.hbase.MiniHBaseCluster; 055import org.apache.hadoop.hbase.PrivateCellUtil; 056import org.apache.hadoop.hbase.ServerName; 057import org.apache.hadoop.hbase.StartMiniClusterOption; 058import org.apache.hadoop.hbase.TableName; 059import org.apache.hadoop.hbase.ZooKeeperConnectionException; 060import org.apache.hadoop.hbase.client.Admin; 061import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 062import org.apache.hadoop.hbase.client.Consistency; 063import org.apache.hadoop.hbase.client.Delete; 064import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 065import org.apache.hadoop.hbase.client.Get; 066import org.apache.hadoop.hbase.client.Mutation; 067import org.apache.hadoop.hbase.client.Put; 068import org.apache.hadoop.hbase.client.RegionInfo; 069import org.apache.hadoop.hbase.client.Result; 070import org.apache.hadoop.hbase.client.ResultScanner; 071import org.apache.hadoop.hbase.client.Scan; 072import org.apache.hadoop.hbase.client.Table; 073import org.apache.hadoop.hbase.client.TableDescriptor; 074import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 075import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro; 076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 077import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 078import org.apache.hadoop.hbase.coprocessor.MasterObserver; 079import org.apache.hadoop.hbase.coprocessor.ObserverContext; 080import org.apache.hadoop.hbase.io.HFileLink; 081import org.apache.hadoop.hbase.io.Reference; 082import org.apache.hadoop.hbase.master.HMaster; 083import org.apache.hadoop.hbase.master.LoadBalancer; 084import org.apache.hadoop.hbase.master.MasterRpcServices; 085import org.apache.hadoop.hbase.master.RegionState; 086import org.apache.hadoop.hbase.master.RegionState.State; 087import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 088import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 089import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 090import org.apache.hadoop.hbase.master.assignment.RegionStates; 091import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 092import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 093import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 094import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 095import org.apache.hadoop.hbase.testclassification.LargeTests; 096import org.apache.hadoop.hbase.testclassification.RegionServerTests; 097import org.apache.hadoop.hbase.util.Bytes; 098import org.apache.hadoop.hbase.util.CommonFSUtils; 099import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 100import org.apache.hadoop.hbase.util.FSUtils; 101import org.apache.hadoop.hbase.util.HBaseFsck; 102import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 103import org.apache.hadoop.hbase.util.Threads; 104import org.apache.zookeeper.KeeperException; 105import org.apache.zookeeper.KeeperException.NodeExistsException; 106import org.junit.After; 107import org.junit.AfterClass; 108import org.junit.Assert; 109import org.junit.Before; 110import org.junit.BeforeClass; 111import org.junit.ClassRule; 112import org.junit.Rule; 113import org.junit.Test; 114import org.junit.experimental.categories.Category; 115import org.junit.rules.TestName; 116import org.mockito.Mockito; 117import org.slf4j.Logger; 118import org.slf4j.LoggerFactory; 119 120import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 121import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 122import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 123 124import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 125import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 127import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 128 129/** 130 * The below tests are testing split region against a running cluster 131 */ 132@Category({ RegionServerTests.class, LargeTests.class }) 133public class TestSplitTransactionOnCluster { 134 135 @ClassRule 136 public static final HBaseClassTestRule CLASS_RULE = 137 HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class); 138 139 private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class); 140 private Admin admin = null; 141 private MiniHBaseCluster cluster = null; 142 private static final int NB_SERVERS = 3; 143 144 static final HBaseTestingUtility TESTING_UTIL = new HBaseTestingUtility(); 145 146 @Rule 147 public TestName name = new TestName(); 148 149 @BeforeClass 150 public static void before() throws Exception { 151 TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); 152 StartMiniClusterOption option = StartMiniClusterOption.builder().masterClass(MyMaster.class) 153 .numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build(); 154 TESTING_UTIL.startMiniCluster(option); 155 } 156 157 @AfterClass 158 public static void after() throws Exception { 159 TESTING_UTIL.shutdownMiniCluster(); 160 } 161 162 @Before 163 public void setup() throws IOException { 164 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS); 165 this.admin = TESTING_UTIL.getAdmin(); 166 this.cluster = TESTING_UTIL.getMiniHBaseCluster(); 167 } 168 169 @After 170 public void tearDown() throws Exception { 171 this.admin.close(); 172 for (TableDescriptor htd : this.admin.listTableDescriptors()) { 173 LOG.info("Tear down, remove table=" + htd.getTableName()); 174 TESTING_UTIL.deleteTable(htd.getTableName()); 175 } 176 } 177 178 private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) 179 throws IOException, InterruptedException { 180 assertEquals(1, regions.size()); 181 RegionInfo hri = regions.get(0).getRegionInfo(); 182 AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); 183 return hri; 184 } 185 186 private void requestSplitRegion(final HRegionServer rsServer, final Region region, 187 final byte[] midKey) throws IOException { 188 long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0); 189 // wait for the split to complete or get interrupted. If the split completes successfully, 190 // the procedure will return true; if the split fails, the procedure would throw exception. 191 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 192 } 193 194 @Test 195 public void testRITStateForRollback() throws Exception { 196 final TableName tableName = TableName.valueOf(name.getMethodName()); 197 final HMaster master = cluster.getMaster(); 198 try { 199 // Create table then get the single region for our new table. 200 Table t = createTableAndWait(tableName, Bytes.toBytes("cf")); 201 final List<HRegion> regions = cluster.getRegions(tableName); 202 final RegionInfo hri = getAndCheckSingleTableRegion(regions); 203 insertData(tableName, admin, t); 204 t.close(); 205 206 // Turn off balancer so it doesn't cut in and mess up our placements. 207 this.admin.balancerSwitch(false, true); 208 // Turn off the meta scanner so it don't remove parent on us. 209 master.setCatalogJanitorEnabled(false); 210 211 // find a splittable region 212 final HRegion region = findSplittableRegion(regions); 213 assertTrue("not able to find a splittable region", region != null); 214 215 // install master co-processor to fail splits 216 master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class, 217 Coprocessor.PRIORITY_USER, master.getConfiguration()); 218 219 // split async 220 this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 }); 221 222 // we have to wait until the SPLITTING state is seen by the master 223 FailingSplitMasterObserver observer = 224 master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class); 225 assertNotNull(observer); 226 observer.latch.await(); 227 228 LOG.info("Waiting for region to come out of RIT"); 229 while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) { 230 Threads.sleep(100); 231 } 232 assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)); 233 } finally { 234 admin.balancerSwitch(true, false); 235 master.setCatalogJanitorEnabled(true); 236 abortAndWaitForMaster(); 237 TESTING_UTIL.deleteTable(tableName); 238 } 239 } 240 241 @Test 242 public void testSplitFailedCompactionAndSplit() throws Exception { 243 final TableName tableName = TableName.valueOf(name.getMethodName()); 244 // Create table then get the single region for our new table. 245 byte[] cf = Bytes.toBytes("cf"); 246 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 247 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 248 admin.createTable(htd); 249 250 for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) { 251 Thread.sleep(100); 252 } 253 assertEquals(1, cluster.getRegions(tableName).size()); 254 255 HRegion region = cluster.getRegions(tableName).get(0); 256 HStore store = region.getStore(cf); 257 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 258 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 259 260 Table t = TESTING_UTIL.getConnection().getTable(tableName); 261 // insert data 262 insertData(tableName, admin, t); 263 insertData(tableName, admin, t); 264 265 int fileNum = store.getStorefiles().size(); 266 // 0, Compaction Request 267 store.triggerMajorCompaction(); 268 Optional<CompactionContext> cc = store.requestCompaction(); 269 assertTrue(cc.isPresent()); 270 // 1, A timeout split 271 // 1.1 close region 272 assertEquals(2, region.close(false).get(cf).size()); 273 // 1.2 rollback and Region initialize again 274 region.initialize(); 275 276 // 2, Run Compaction cc 277 assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE)); 278 assertTrue(fileNum > store.getStorefiles().size()); 279 280 // 3, Split 281 requestSplitRegion(regionServer, region, Bytes.toBytes("row3")); 282 assertEquals(2, cluster.getRegions(tableName).size()); 283 } 284 285 @Test 286 public void testSplitCompactWithPriority() throws Exception { 287 final TableName tableName = TableName.valueOf(name.getMethodName()); 288 // Create table then get the single region for our new table. 289 byte[] cf = Bytes.toBytes("cf"); 290 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 291 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 292 admin.createTable(htd); 293 294 assertNotEquals("Unable to retrieve regions of the table", -1, 295 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 296 297 HRegion region = cluster.getRegions(tableName).get(0); 298 HStore store = region.getStore(cf); 299 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 300 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 301 302 Table table = TESTING_UTIL.getConnection().getTable(tableName); 303 // insert data 304 insertData(tableName, admin, table); 305 insertData(tableName, admin, table, 20); 306 insertData(tableName, admin, table, 40); 307 308 // Compaction Request 309 store.triggerMajorCompaction(); 310 Optional<CompactionContext> compactionContext = store.requestCompaction(); 311 assertTrue(compactionContext.isPresent()); 312 assertFalse(compactionContext.get().getRequest().isAfterSplit()); 313 assertEquals(compactionContext.get().getRequest().getPriority(), 13); 314 315 // Split 316 long procId = 317 cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0); 318 319 // wait for the split to complete or get interrupted. If the split completes successfully, 320 // the procedure will return true; if the split fails, the procedure would throw exception. 321 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 322 Thread.sleep(3000); 323 assertNotEquals("Table is not split properly?", -1, 324 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 325 // we have 2 daughter regions 326 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 327 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 328 HStore hStore1 = hRegion1.getStore(cf); 329 HStore hStore2 = hRegion2.getStore(cf); 330 331 // For hStore1 && hStore2, set mock reference to one of the storeFiles 332 StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo(); 333 StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo(); 334 Field field = StoreFileInfo.class.getDeclaredField("reference"); 335 field.setAccessible(true); 336 field.set(storeFileInfo1, Mockito.mock(Reference.class)); 337 field.set(storeFileInfo2, Mockito.mock(Reference.class)); 338 hStore1.triggerMajorCompaction(); 339 hStore2.triggerMajorCompaction(); 340 341 compactionContext = hStore1.requestCompaction(); 342 assertTrue(compactionContext.isPresent()); 343 // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true && 344 // highest priority for hStore1's compactionContext 345 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 346 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000); 347 348 compactionContext = 349 hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null); 350 assertTrue(compactionContext.isPresent()); 351 // compaction request contains higher priority than default priority of daughter region 352 // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to 353 // be accepted. 354 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 355 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10); 356 admin.disableTable(tableName); 357 admin.deleteTable(tableName); 358 } 359 360 @Test 361 public void testContinuousSplitUsingLinkFile() throws Exception { 362 final TableName tableName = TableName.valueOf(name.getMethodName()); 363 // Create table then get the single region for our new table. 364 byte[] cf = Bytes.toBytes("cf"); 365 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 366 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)); 367 String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName(); 368 builder.setValue(SPLIT_POLICY, splitPolicy); 369 370 admin.createTable(builder.build()); 371 admin.compactionSwitch(false, new ArrayList<>()); 372 373 assertNotEquals("Unable to retrieve regions of the table", -1, 374 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 375 Table table = TESTING_UTIL.getConnection().getTable(tableName); 376 // insert data 377 insertData(tableName, admin, table, 10); 378 insertData(tableName, admin, table, 20); 379 insertData(tableName, admin, table, 40); 380 int rowCount = 3 * 4; 381 Scan scan = new Scan(); 382 scanValidate(scan, rowCount, table); 383 384 // Split 385 admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(), 386 Bytes.toBytes("row14")); 387 // wait for the split to complete or get interrupted. If the split completes successfully, 388 // the procedure will return true; if the split fails, the procedure would throw exception. 389 Thread.sleep(3000); 390 assertNotEquals("Table is not split properly?", -1, 391 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 392 // we have 2 daughter regions 393 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 394 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 395 HStore hStore1 = hRegion1.getStore(cf); 396 HStore hStore2 = hRegion2.getStore(cf); 397 // the sum of store files of the two children should be equal to their parent 398 assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount()); 399 // both the two children should have link files 400 for (StoreFile sf : hStore1.getStorefiles()) { 401 assertTrue(HFileLink.isHFileLink(sf.getPath())); 402 } 403 for (StoreFile sf : hStore2.getStorefiles()) { 404 assertTrue(HFileLink.isHFileLink(sf.getPath())); 405 } 406 // validate children data 407 scan = new Scan(); 408 scanValidate(scan, rowCount, table); 409 410 // Continuous Split 411 findRegionToSplit(tableName, "row24"); 412 Thread.sleep(3000); 413 assertNotEquals("Table is not split properly?", -1, 414 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3)); 415 // now table has 3 region, each region should have one link file 416 for (HRegion newRegion : cluster.getRegions(tableName)) { 417 assertEquals(1, newRegion.getStore(cf).getStorefilesCount()); 418 assertTrue( 419 HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath())); 420 } 421 422 scan = new Scan(); 423 scanValidate(scan, rowCount, table); 424 425 // Continuous Split, random split HFileLink, generate Reference files. 426 // After this, can not continuous split, because there are reference files. 427 findRegionToSplit(tableName, "row11"); 428 Thread.sleep(3000); 429 assertNotEquals("Table is not split properly?", -1, 430 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4)); 431 432 scan = new Scan(); 433 scanValidate(scan, rowCount, table); 434 } 435 436 private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception { 437 HRegion toSplit = null; 438 byte[] toSplitKey = Bytes.toBytes(splitRowKey); 439 for (HRegion rg : cluster.getRegions(tableName)) { 440 LOG.debug( 441 "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()=" 442 + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey); 443 if ( 444 (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare( 445 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()), 446 PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0) 447 && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare( 448 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()), 449 PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0) 450 ) { 451 toSplit = rg; 452 } 453 } 454 assertNotNull(toSplit); 455 admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey); 456 } 457 458 private static void scanValidate(Scan scan, int expectedRowCount, Table table) 459 throws IOException { 460 ResultScanner scanner = table.getScanner(scan); 461 int rows = 0; 462 for (Result result : scanner) { 463 rows++; 464 } 465 scanner.close(); 466 assertEquals(expectedRowCount, rows); 467 } 468 469 public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver { 470 volatile CountDownLatch latch; 471 472 @Override 473 public void start(CoprocessorEnvironment e) throws IOException { 474 latch = new CountDownLatch(1); 475 } 476 477 @Override 478 public Optional<MasterObserver> getMasterObserver() { 479 return Optional.of(this); 480 } 481 482 @Override 483 public void preSplitRegionBeforeMETAAction( 484 final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey, 485 final List<Mutation> metaEntries) throws IOException { 486 latch.countDown(); 487 throw new IOException("Causing rollback of region split"); 488 } 489 } 490 491 @Test 492 public void testSplitRollbackOnRegionClosing() throws Exception { 493 final TableName tableName = TableName.valueOf(name.getMethodName()); 494 495 // Create table then get the single region for our new table. 496 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 497 List<HRegion> regions = cluster.getRegions(tableName); 498 RegionInfo hri = getAndCheckSingleTableRegion(regions); 499 500 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 501 502 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); 503 504 // Turn off balancer so it doesn't cut in and mess up our placements. 505 this.admin.balancerSwitch(false, true); 506 // Turn off the meta scanner so it don't remove parent on us. 507 cluster.getMaster().setCatalogJanitorEnabled(false); 508 try { 509 // Add a bit of load up into the table so splittable. 510 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 511 // Get region pre-split. 512 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 513 printOutRegions(server, "Initial regions: "); 514 int regionCount = cluster.getRegions(hri.getTable()).size(); 515 regionStates.updateRegionState(hri, RegionState.State.CLOSING); 516 517 // Now try splitting.... should fail. And each should successfully 518 // rollback. 519 // We don't roll back here anymore. Instead we fail-fast on construction of the 520 // split transaction. Catch the exception instead. 521 try { 522 this.admin.splitRegionAsync(hri.getRegionName()); 523 fail(); 524 } catch (DoNotRetryRegionException e) { 525 // Expected 526 } 527 // Wait around a while and assert count of regions remains constant. 528 for (int i = 0; i < 10; i++) { 529 Thread.sleep(100); 530 assertEquals(regionCount, cluster.getRegions(hri.getTable()).size()); 531 } 532 regionStates.updateRegionState(hri, State.OPEN); 533 // Now try splitting and it should work. 534 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 535 // Get daughters 536 checkAndGetDaughters(tableName); 537 // OK, so split happened after we cleared the blocking node. 538 } finally { 539 admin.balancerSwitch(true, false); 540 cluster.getMaster().setCatalogJanitorEnabled(true); 541 t.close(); 542 } 543 } 544 545 /** 546 * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't 547 * find the immediate daughter of an offlined parent. 548 */ 549 @Test 550 public void testShutdownFixupWhenDaughterHasSplit() throws Exception { 551 final TableName tableName = TableName.valueOf(name.getMethodName()); 552 553 // Create table then get the single region for our new table. 554 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 555 List<HRegion> regions = cluster.getRegions(tableName); 556 RegionInfo hri = getAndCheckSingleTableRegion(regions); 557 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 558 559 // Turn off balancer so it doesn't cut in and mess up our placements. 560 this.admin.balancerSwitch(false, true); 561 // Turn off the meta scanner so it don't remove parent on us. 562 cluster.getMaster().setCatalogJanitorEnabled(false); 563 try { 564 // Add a bit of load up into the table so splittable. 565 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 566 // Get region pre-split. 567 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 568 printOutRegions(server, "Initial regions: "); 569 // Now split. 570 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 571 // Get daughters 572 List<HRegion> daughters = checkAndGetDaughters(tableName); 573 // Now split one of the daughters. 574 HRegion daughterRegion = daughters.get(0); 575 RegionInfo daughter = daughterRegion.getRegionInfo(); 576 LOG.info("Daughter we are going to split: " + daughter); 577 clearReferences(daughterRegion); 578 LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences()); 579 admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES); 580 // Get list of daughters 581 daughters = cluster.getRegions(tableName); 582 for (HRegion d : daughters) { 583 LOG.info("Regions before crash: " + d); 584 } 585 // Now crash the server 586 cluster.abortRegionServer(tableRegionIndex); 587 waitUntilRegionServerDead(); 588 awaitDaughters(tableName, daughters.size()); 589 // Assert daughters are online and ONLY the original daughters -- that 590 // fixup didn't insert one during server shutdown recover. 591 regions = cluster.getRegions(tableName); 592 for (HRegion d : daughters) { 593 LOG.info("Regions after crash: " + d); 594 } 595 if (daughters.size() != regions.size()) { 596 LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size()); 597 } 598 assertEquals(daughters.size(), regions.size()); 599 for (HRegion r : regions) { 600 LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r)); 601 assertTrue("Missing region post crash " + r, daughters.contains(r)); 602 } 603 } finally { 604 LOG.info("EXITING"); 605 admin.balancerSwitch(true, false); 606 cluster.getMaster().setCatalogJanitorEnabled(true); 607 t.close(); 608 } 609 } 610 611 private void clearReferences(HRegion region) throws IOException { 612 // Presumption. 613 assertEquals(1, region.getStores().size()); 614 HStore store = region.getStores().get(0); 615 while (store.hasReferences()) { 616 while (store.storeEngine.getCompactor().isCompacting()) { 617 Threads.sleep(100); 618 } 619 // Run new compaction. Shoudn't be any others running. 620 region.compact(true); 621 store.closeAndArchiveCompactedFiles(); 622 } 623 } 624 625 @Test 626 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception { 627 TableName userTableName = TableName.valueOf(name.getMethodName()); 628 TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName) 629 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build(); 630 admin.createTable(htd); 631 Table table = TESTING_UTIL.getConnection().getTable(userTableName); 632 try { 633 for (int i = 0; i <= 5; i++) { 634 String row = "row" + i; 635 Put p = new Put(row.getBytes()); 636 String val = "Val" + i; 637 p.addColumn("col".getBytes(), "ql".getBytes(), val.getBytes()); 638 table.put(p); 639 admin.flush(userTableName); 640 Delete d = new Delete(row.getBytes()); 641 // Do a normal delete 642 table.delete(d); 643 admin.flush(userTableName); 644 } 645 admin.majorCompact(userTableName); 646 List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 647 .getRegionsOfTable(userTableName); 648 assertEquals(1, regionsOfTable.size()); 649 RegionInfo hRegionInfo = regionsOfTable.get(0); 650 Put p = new Put("row6".getBytes()); 651 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 652 table.put(p); 653 p = new Put("row7".getBytes()); 654 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 655 table.put(p); 656 p = new Put("row8".getBytes()); 657 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 658 table.put(p); 659 admin.flush(userTableName); 660 admin.splitRegionAsync(hRegionInfo.getRegionName(), "row7".getBytes()); 661 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 662 .getRegionsOfTable(userTableName); 663 664 while (regionsOfTable.size() != 2) { 665 Thread.sleep(1000); 666 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 667 .getRegionsOfTable(userTableName); 668 LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": " 669 + regionsOfTable); 670 671 } 672 Assert.assertEquals(2, regionsOfTable.size()); 673 674 Scan s = new Scan(); 675 ResultScanner scanner = table.getScanner(s); 676 int mainTableCount = 0; 677 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { 678 mainTableCount++; 679 } 680 Assert.assertEquals(3, mainTableCount); 681 } finally { 682 table.close(); 683 } 684 } 685 686 /** 687 * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could 688 * remove the parent region the master is killed and restarted. 689 */ 690 @Test 691 public void testMasterRestartAtRegionSplitPendingCatalogJanitor() 692 throws IOException, InterruptedException, NodeExistsException, KeeperException, 693 ServiceException, ExecutionException, TimeoutException { 694 final TableName tableName = TableName.valueOf(name.getMethodName()); 695 // Create table then get the single region for our new table. 696 try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) { 697 List<HRegion> regions = cluster.getRegions(tableName); 698 RegionInfo hri = getAndCheckSingleTableRegion(regions); 699 700 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 701 702 // Turn off balancer so it doesn't cut in and mess up our placements. 703 this.admin.balancerSwitch(false, true); 704 // Turn off the meta scanner so it don't remove parent on us. 705 cluster.getMaster().setCatalogJanitorEnabled(false); 706 // Add a bit of load up into the table so splittable. 707 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 708 // Get region pre-split. 709 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 710 printOutRegions(server, "Initial regions: "); 711 // Call split. 712 this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 713 List<HRegion> daughters = checkAndGetDaughters(tableName); 714 715 // Before cleanup, get a new master. 716 HMaster master = abortAndWaitForMaster(); 717 // Now call compact on the daughters and clean up any references. 718 for (HRegion daughter : daughters) { 719 clearReferences(daughter); 720 assertFalse(daughter.hasReferences()); 721 } 722 // BUT calling compact on the daughters is not enough. The CatalogJanitor looks 723 // in the filesystem, and the filesystem content is not same as what the Region 724 // is reading from. Compacted-away files are picked up later by the compacted 725 // file discharger process. It runs infrequently. Make it run so CatalogJanitor 726 // doens't find any references. 727 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 728 boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); 729 rst.getRegionServer().compactedFileDischarger.run(); 730 rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); 731 } 732 cluster.getMaster().setCatalogJanitorEnabled(true); 733 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 734 LOG.info("Starting run of CatalogJanitor"); 735 cluster.getMaster().getCatalogJanitor().run(); 736 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 737 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 738 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri); 739 assertEquals(null, regionServerOfRegion); 740 } finally { 741 TESTING_UTIL.getAdmin().balancerSwitch(true, false); 742 cluster.getMaster().setCatalogJanitorEnabled(true); 743 } 744 } 745 746 @Test 747 public void testSplitWithRegionReplicas() throws Exception { 748 final TableName tableName = TableName.valueOf(name.getMethodName()); 749 HTableDescriptor htd = TESTING_UTIL.createTableDescriptor(name.getMethodName()); 750 htd.setRegionReplication(2); 751 htd.addCoprocessor(SlowMeCopro.class.getName()); 752 // Create table then get the single region for our new table. 753 Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null); 754 List<HRegion> oldRegions; 755 do { 756 oldRegions = cluster.getRegions(tableName); 757 Thread.sleep(10); 758 } while (oldRegions.size() != 2); 759 for (HRegion h : oldRegions) 760 LOG.debug("OLDREGION " + h.getRegionInfo()); 761 try { 762 int regionServerIndex = 763 cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName()); 764 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 765 insertData(tableName, admin, t); 766 // Turn off balancer so it doesn't cut in and mess up our placements. 767 admin.balancerSwitch(false, true); 768 // Turn off the meta scanner so it don't remove parent on us. 769 cluster.getMaster().setCatalogJanitorEnabled(false); 770 boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 771 assertEquals("The specified table should be present.", true, tableExists); 772 final HRegion region = findSplittableRegion(oldRegions); 773 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 774 regionServer = cluster.getRegionServer(regionServerIndex); 775 assertTrue("not able to find a splittable region", region != null); 776 try { 777 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 778 } catch (IOException e) { 779 e.printStackTrace(); 780 fail("Split execution should have succeeded with no exceptions thrown " + e); 781 } 782 // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName); 783 List<HRegion> newRegions; 784 do { 785 newRegions = cluster.getRegions(tableName); 786 for (HRegion h : newRegions) 787 LOG.debug("NEWREGION " + h.getRegionInfo()); 788 Thread.sleep(1000); 789 } while ( 790 (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1))) 791 || newRegions.size() != 4 792 ); 793 tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 794 assertEquals("The specified table should be present.", true, tableExists); 795 // exists works on stale and we see the put after the flush 796 byte[] b1 = "row1".getBytes(); 797 Get g = new Get(b1); 798 g.setConsistency(Consistency.STRONG); 799 // The following GET will make a trip to the meta to get the new location of the 1st daughter 800 // In the process it will also get the location of the replica of the daughter (initially 801 // pointing to the parent's replica) 802 Result r = t.get(g); 803 Assert.assertFalse(r.isStale()); 804 LOG.info("exists stale after flush done"); 805 806 SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1)); 807 g = new Get(b1); 808 g.setConsistency(Consistency.TIMELINE); 809 // This will succeed because in the previous GET we get the location of the replica 810 r = t.get(g); 811 Assert.assertTrue(r.isStale()); 812 SlowMeCopro.getPrimaryCdl().get().countDown(); 813 } finally { 814 SlowMeCopro.getPrimaryCdl().get().countDown(); 815 admin.balancerSwitch(true, false); 816 cluster.getMaster().setCatalogJanitorEnabled(true); 817 t.close(); 818 } 819 } 820 821 private void insertData(final TableName tableName, Admin admin, Table t) throws IOException { 822 insertData(tableName, admin, t, 1); 823 } 824 825 private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException { 826 Put p = new Put(Bytes.toBytes("row" + i)); 827 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); 828 t.put(p); 829 p = new Put(Bytes.toBytes("row" + (i + 1))); 830 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); 831 t.put(p); 832 p = new Put(Bytes.toBytes("row" + (i + 2))); 833 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); 834 t.put(p); 835 p = new Put(Bytes.toBytes("row" + (i + 3))); 836 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); 837 t.put(p); 838 admin.flush(tableName); 839 } 840 841 /** 842 * If a table has regions that have no store files in a region, they should split successfully 843 * into two regions with no store files. 844 */ 845 @Test 846 public void testSplitRegionWithNoStoreFiles() throws Exception { 847 final TableName tableName = TableName.valueOf(name.getMethodName()); 848 // Create table then get the single region for our new table. 849 createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 850 List<HRegion> regions = cluster.getRegions(tableName); 851 RegionInfo hri = getAndCheckSingleTableRegion(regions); 852 ensureTableRegionNotOnSameServerAsMeta(admin, hri); 853 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); 854 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 855 // Turn off balancer so it doesn't cut in and mess up our placements. 856 this.admin.balancerSwitch(false, true); 857 // Turn off the meta scanner so it don't remove parent on us. 858 cluster.getMaster().setCatalogJanitorEnabled(false); 859 try { 860 // Precondition: we created a table with no data, no store files. 861 printOutRegions(regionServer, "Initial regions: "); 862 Configuration conf = cluster.getConfiguration(); 863 HBaseFsck.debugLsr(conf, new Path("/")); 864 Path rootDir = CommonFSUtils.getRootDir(conf); 865 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem(); 866 Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 867 assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size()); 868 869 // find a splittable region. Refresh the regions list 870 regions = cluster.getRegions(tableName); 871 final HRegion region = findSplittableRegion(regions); 872 assertTrue("not able to find a splittable region", region != null); 873 874 // Now split. 875 try { 876 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 877 } catch (IOException e) { 878 fail("Split execution should have succeeded with no exceptions thrown"); 879 } 880 881 // Postcondition: split the table with no store files into two regions, but still have no 882 // store files 883 List<HRegion> daughters = cluster.getRegions(tableName); 884 assertEquals(2, daughters.size()); 885 886 // check dirs 887 HBaseFsck.debugLsr(conf, new Path("/")); 888 Map<String, Path> storefilesAfter = 889 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 890 assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0, 891 storefilesAfter.size()); 892 893 hri = region.getRegionInfo(); // split parent 894 AssignmentManager am = cluster.getMaster().getAssignmentManager(); 895 RegionStates regionStates = am.getRegionStates(); 896 long start = EnvironmentEdgeManager.currentTime(); 897 while (!regionStates.isRegionInState(hri, State.SPLIT)) { 898 LOG.debug("Waiting for SPLIT state on: " + hri); 899 assertFalse("Timed out in waiting split parent to be in state SPLIT", 900 EnvironmentEdgeManager.currentTime() - start > 60000); 901 Thread.sleep(500); 902 } 903 assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN)); 904 assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN)); 905 906 // We should not be able to assign it again 907 try { 908 am.assign(hri); 909 } catch (DoNotRetryIOException e) { 910 // Expected 911 } 912 assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri)); 913 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 914 915 // We should not be able to unassign it either 916 try { 917 am.unassign(hri); 918 fail("Should have thrown exception"); 919 } catch (DoNotRetryIOException e) { 920 // Expected 921 } 922 assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri)); 923 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 924 } finally { 925 admin.balancerSwitch(true, false); 926 cluster.getMaster().setCatalogJanitorEnabled(true); 927 } 928 } 929 930 @Test 931 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception { 932 final TableName tableName = TableName.valueOf(name.getMethodName()); 933 try { 934 byte[] cf = Bytes.toBytes("f"); 935 byte[] cf1 = Bytes.toBytes("i_f"); 936 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 937 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)) 938 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1)) 939 .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build(); 940 admin.createTable(htd); 941 List<HRegion> regions = awaitTableRegions(tableName); 942 HRegion region = regions.get(0); 943 for (int i = 3; i < 9; i++) { 944 Put p = new Put(Bytes.toBytes("row" + i)); 945 p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 946 p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 947 region.put(p); 948 } 949 region.flush(true); 950 HStore store = region.getStore(cf); 951 Collection<HStoreFile> storefiles = store.getStorefiles(); 952 assertEquals(1, storefiles.size()); 953 assertFalse(region.hasReferences()); 954 Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f", 955 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 956 assertNull(referencePath); 957 referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f", 958 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 959 assertNotNull(referencePath); 960 } finally { 961 TESTING_UTIL.deleteTable(tableName); 962 } 963 } 964 965 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException { 966 for (int i = 0; i < 5; ++i) { 967 for (HRegion r : regions) { 968 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) { 969 return (r); 970 } 971 } 972 Thread.sleep(100); 973 } 974 return null; 975 } 976 977 private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException { 978 List<HRegion> daughters = null; 979 // try up to 10s 980 for (int i = 0; i < 100; i++) { 981 daughters = cluster.getRegions(tableName); 982 if (daughters.size() >= 2) { 983 break; 984 } 985 Thread.sleep(100); 986 } 987 assertTrue(daughters.size() >= 2); 988 return daughters; 989 } 990 991 private HMaster abortAndWaitForMaster() throws IOException, InterruptedException { 992 cluster.abortMaster(0); 993 cluster.waitOnMaster(0); 994 HMaster master = cluster.startMaster().getMaster(); 995 cluster.waitForActiveAndReadyMaster(); 996 // reset the connections 997 Closeables.close(admin, true); 998 TESTING_UTIL.invalidateConnection(); 999 admin = TESTING_UTIL.getAdmin(); 1000 return master; 1001 } 1002 1003 /** 1004 * Ensure single table region is not on same server as the single hbase:meta table region. 1005 * @return Index of the server hosting the single table region nn * @throws 1006 * org.apache.hadoop.hbase.ZooKeeperConnectionException n 1007 */ 1008 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri) 1009 throws IOException, MasterNotRunningException, ZooKeeperConnectionException, 1010 InterruptedException { 1011 // Now make sure that the table region is not on same server as that hosting 1012 // hbase:meta We don't want hbase:meta replay polluting our test when we later crash 1013 // the table region serving server. 1014 int metaServerIndex = cluster.getServerWithMeta(); 1015 boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(TESTING_UTIL.getConfiguration()); 1016 if (tablesOnMaster) { 1017 // Need to check master is supposed to host meta... perhaps it is not. 1018 throw new UnsupportedOperationException(); 1019 // TODO: assertTrue(metaServerIndex == -1); // meta is on master now 1020 } 1021 HRegionServer metaRegionServer = 1022 tablesOnMaster ? cluster.getMaster() : cluster.getRegionServer(metaServerIndex); 1023 int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1024 assertTrue(tableRegionIndex != -1); 1025 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); 1026 LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other=" 1027 + tableRegionServer.getServerName()); 1028 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { 1029 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); 1030 assertNotNull(hrs); 1031 assertNotNull(hri); 1032 LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName() 1033 + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); 1034 admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName()); 1035 } 1036 // Wait till table region is up on the server that is NOT carrying hbase:meta. 1037 for (int i = 0; i < 100; i++) { 1038 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1039 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; 1040 LOG.debug("Waiting on region move off the hbase:meta server; current index " 1041 + tableRegionIndex + " and metaServerIndex=" + metaServerIndex); 1042 Thread.sleep(100); 1043 } 1044 assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex, 1045 tableRegionIndex != -1 && tableRegionIndex != metaServerIndex); 1046 // Verify for sure table region is not on same server as hbase:meta 1047 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1048 assertTrue(tableRegionIndex != -1); 1049 assertNotSame(metaServerIndex, tableRegionIndex); 1050 return tableRegionIndex; 1051 } 1052 1053 /** 1054 * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers 1055 * since crashed servers occupy an index. nn * @return A regionserver that is not 1056 * <code>notThisOne</code> or null if none found 1057 */ 1058 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster, 1059 final HRegionServer notThisOne) { 1060 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 1061 HRegionServer hrs = rst.getRegionServer(); 1062 if (hrs.getServerName().equals(notThisOne.getServerName())) continue; 1063 if (hrs.isStopping() || hrs.isStopped()) continue; 1064 return hrs; 1065 } 1066 return null; 1067 } 1068 1069 private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException { 1070 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 1071 for (RegionInfo region : regions) { 1072 LOG.info(prefix + region.getRegionNameAsString()); 1073 } 1074 } 1075 1076 private void waitUntilRegionServerDead() throws InterruptedException, IOException { 1077 // Wait until the master processes the RS shutdown 1078 for (int i = 1079 0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1080 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) { 1081 LOG.info("Waiting on server to go down"); 1082 Thread.sleep(100); 1083 } 1084 assertFalse("Waited too long for RS to die", 1085 cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1086 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS); 1087 } 1088 1089 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException { 1090 // Wait till regions are back on line again. 1091 for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) { 1092 LOG.info("Waiting for repair to happen"); 1093 Thread.sleep(1000); 1094 } 1095 if (cluster.getRegions(tableName).size() < numDaughters) { 1096 fail("Waiting too long for daughter regions"); 1097 } 1098 } 1099 1100 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException { 1101 List<HRegion> regions = null; 1102 for (int i = 0; i < 100; i++) { 1103 regions = cluster.getRegions(tableName); 1104 if (regions.size() > 0) break; 1105 Thread.sleep(100); 1106 } 1107 return regions; 1108 } 1109 1110 private Table createTableAndWait(TableName tableName, byte[] cf) 1111 throws IOException, InterruptedException { 1112 Table t = TESTING_UTIL.createTable(tableName, cf); 1113 awaitTableRegions(tableName); 1114 assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0); 1115 return t; 1116 } 1117 1118 // Make it public so that JVMClusterUtil can access it. 1119 public static class MyMaster extends HMaster { 1120 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException { 1121 super(conf); 1122 } 1123 1124 @Override 1125 protected RSRpcServices createRpcServices() throws IOException { 1126 return new MyMasterRpcServices(this); 1127 } 1128 } 1129 1130 static class MyMasterRpcServices extends MasterRpcServices { 1131 static AtomicBoolean enabled = new AtomicBoolean(false); 1132 1133 private HMaster myMaster; 1134 1135 public MyMasterRpcServices(HMaster master) throws IOException { 1136 super(master); 1137 myMaster = master; 1138 } 1139 1140 @Override 1141 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c, 1142 ReportRegionStateTransitionRequest req) throws ServiceException { 1143 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req); 1144 if ( 1145 enabled.get() 1146 && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT) 1147 && !resp.hasErrorMessage() 1148 ) { 1149 RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates(); 1150 for (RegionStateNode regionState : regionStates.getRegionsInTransition()) { 1151 /* 1152 * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew()) 1153 * { regionStates.deleteRegion(regionState.getRegion()); } 1154 */ 1155 } 1156 } 1157 return resp; 1158 } 1159 } 1160 1161 static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 1162 1163 @Override 1164 protected boolean shouldSplit() { 1165 return true; 1166 } 1167 1168 @Override 1169 public boolean skipStoreFileRangeCheck(String familyName) { 1170 if (familyName.startsWith("i_")) { 1171 return true; 1172 } else { 1173 return false; 1174 } 1175 } 1176 } 1177}