001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotEquals; 023import static org.junit.Assert.assertNotNull; 024import static org.junit.Assert.assertNotSame; 025import static org.junit.Assert.assertNull; 026import static org.junit.Assert.assertTrue; 027import static org.junit.Assert.fail; 028 029import java.io.IOException; 030import java.lang.reflect.Field; 031import java.util.ArrayList; 032import java.util.Collection; 033import java.util.List; 034import java.util.Map; 035import java.util.Optional; 036import java.util.concurrent.CountDownLatch; 037import java.util.concurrent.ExecutionException; 038import java.util.concurrent.TimeUnit; 039import java.util.concurrent.TimeoutException; 040import java.util.concurrent.atomic.AtomicBoolean; 041import org.apache.commons.io.IOUtils; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.Coprocessor; 046import org.apache.hadoop.hbase.CoprocessorEnvironment; 047import org.apache.hadoop.hbase.DoNotRetryIOException; 048import org.apache.hadoop.hbase.HBaseClassTestRule; 049import org.apache.hadoop.hbase.HBaseTestingUtility; 050import org.apache.hadoop.hbase.HConstants; 051import org.apache.hadoop.hbase.HTableDescriptor; 052import org.apache.hadoop.hbase.MasterNotRunningException; 053import org.apache.hadoop.hbase.MetaTableAccessor; 054import org.apache.hadoop.hbase.MiniHBaseCluster; 055import org.apache.hadoop.hbase.ServerName; 056import org.apache.hadoop.hbase.StartMiniClusterOption; 057import org.apache.hadoop.hbase.TableName; 058import org.apache.hadoop.hbase.UnknownRegionException; 059import org.apache.hadoop.hbase.ZooKeeperConnectionException; 060import org.apache.hadoop.hbase.client.Admin; 061import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 062import org.apache.hadoop.hbase.client.Consistency; 063import org.apache.hadoop.hbase.client.Delete; 064import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 065import org.apache.hadoop.hbase.client.Get; 066import org.apache.hadoop.hbase.client.Mutation; 067import org.apache.hadoop.hbase.client.Put; 068import org.apache.hadoop.hbase.client.RegionInfo; 069import org.apache.hadoop.hbase.client.Result; 070import org.apache.hadoop.hbase.client.ResultScanner; 071import org.apache.hadoop.hbase.client.Scan; 072import org.apache.hadoop.hbase.client.Table; 073import org.apache.hadoop.hbase.client.TableDescriptor; 074import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 075import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro; 076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 077import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 078import org.apache.hadoop.hbase.coprocessor.MasterObserver; 079import org.apache.hadoop.hbase.coprocessor.ObserverContext; 080import org.apache.hadoop.hbase.io.Reference; 081import org.apache.hadoop.hbase.master.HMaster; 082import org.apache.hadoop.hbase.master.LoadBalancer; 083import org.apache.hadoop.hbase.master.MasterRpcServices; 084import org.apache.hadoop.hbase.master.RegionState; 085import org.apache.hadoop.hbase.master.RegionState.State; 086import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 087import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 088import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 089import org.apache.hadoop.hbase.master.assignment.RegionStates; 090import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 091import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 092import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 093import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; 094import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 095import org.apache.hadoop.hbase.testclassification.LargeTests; 096import org.apache.hadoop.hbase.testclassification.RegionServerTests; 097import org.apache.hadoop.hbase.util.Bytes; 098import org.apache.hadoop.hbase.util.CommonFSUtils; 099import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 100import org.apache.hadoop.hbase.util.FSUtils; 101import org.apache.hadoop.hbase.util.HBaseFsck; 102import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 103import org.apache.hadoop.hbase.util.Threads; 104import org.apache.zookeeper.KeeperException; 105import org.apache.zookeeper.KeeperException.NodeExistsException; 106import org.junit.After; 107import org.junit.AfterClass; 108import org.junit.Assert; 109import org.junit.Before; 110import org.junit.BeforeClass; 111import org.junit.ClassRule; 112import org.junit.Rule; 113import org.junit.Test; 114import org.junit.experimental.categories.Category; 115import org.junit.rules.TestName; 116import org.mockito.Mockito; 117import org.slf4j.Logger; 118import org.slf4j.LoggerFactory; 119 120import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 121import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 122 123import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 124import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 125import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 127 128/** 129 * The below tests are testing split region against a running cluster 130 */ 131@Category({RegionServerTests.class, LargeTests.class}) 132public class TestSplitTransactionOnCluster { 133 134 @ClassRule 135 public static final HBaseClassTestRule CLASS_RULE = 136 HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class); 137 138 private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class); 139 private Admin admin = null; 140 private MiniHBaseCluster cluster = null; 141 private static final int NB_SERVERS = 3; 142 143 static final HBaseTestingUtility TESTING_UTIL = 144 new HBaseTestingUtility(); 145 146 @Rule 147 public TestName name = new TestName(); 148 149 @BeforeClass public static void before() throws Exception { 150 TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); 151 StartMiniClusterOption option = StartMiniClusterOption.builder() 152 .masterClass(MyMaster.class).numRegionServers(NB_SERVERS). 153 numDataNodes(NB_SERVERS).build(); 154 TESTING_UTIL.startMiniCluster(option); 155 } 156 157 @AfterClass public static void after() throws Exception { 158 TESTING_UTIL.shutdownMiniCluster(); 159 } 160 161 @Before public void setup() throws IOException { 162 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS); 163 this.admin = TESTING_UTIL.getAdmin(); 164 this.cluster = TESTING_UTIL.getMiniHBaseCluster(); 165 } 166 167 @After 168 public void tearDown() throws Exception { 169 this.admin.close(); 170 for (TableDescriptor htd: this.admin.listTableDescriptors()) { 171 LOG.info("Tear down, remove table=" + htd.getTableName()); 172 TESTING_UTIL.deleteTable(htd.getTableName()); 173 } 174 } 175 176 private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) 177 throws IOException, InterruptedException { 178 assertEquals(1, regions.size()); 179 RegionInfo hri = regions.get(0).getRegionInfo(); 180 AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); 181 return hri; 182 } 183 184 private void requestSplitRegion( 185 final HRegionServer rsServer, 186 final Region region, 187 final byte[] midKey) throws IOException { 188 long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0); 189 // wait for the split to complete or get interrupted. If the split completes successfully, 190 // the procedure will return true; if the split fails, the procedure would throw exception. 191 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 192 } 193 194 @Test 195 public void testRITStateForRollback() throws Exception { 196 final TableName tableName = TableName.valueOf(name.getMethodName()); 197 final HMaster master = cluster.getMaster(); 198 try { 199 // Create table then get the single region for our new table. 200 Table t = createTableAndWait(tableName, Bytes.toBytes("cf")); 201 final List<HRegion> regions = cluster.getRegions(tableName); 202 final RegionInfo hri = getAndCheckSingleTableRegion(regions); 203 insertData(tableName, admin, t); 204 t.close(); 205 206 // Turn off balancer so it doesn't cut in and mess up our placements. 207 this.admin.balancerSwitch(false, true); 208 // Turn off the meta scanner so it don't remove parent on us. 209 master.setCatalogJanitorEnabled(false); 210 211 // find a splittable region 212 final HRegion region = findSplittableRegion(regions); 213 assertTrue("not able to find a splittable region", region != null); 214 215 // install master co-processor to fail splits 216 master.getMasterCoprocessorHost().load( 217 FailingSplitMasterObserver.class, 218 Coprocessor.PRIORITY_USER, 219 master.getConfiguration()); 220 221 // split async 222 this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 }); 223 224 // we have to wait until the SPLITTING state is seen by the master 225 FailingSplitMasterObserver observer = 226 master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class); 227 assertNotNull(observer); 228 observer.latch.await(); 229 230 LOG.info("Waiting for region to come out of RIT"); 231 while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) { 232 Threads.sleep(100); 233 } 234 assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)); 235 } finally { 236 admin.balancerSwitch(true, false); 237 master.setCatalogJanitorEnabled(true); 238 abortAndWaitForMaster(); 239 TESTING_UTIL.deleteTable(tableName); 240 } 241 } 242 243 @Test 244 public void testSplitFailedCompactionAndSplit() throws Exception { 245 final TableName tableName = TableName.valueOf(name.getMethodName()); 246 // Create table then get the single region for our new table. 247 byte[] cf = Bytes.toBytes("cf"); 248 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 249 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 250 admin.createTable(htd); 251 252 for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) { 253 Thread.sleep(100); 254 } 255 assertEquals(1, cluster.getRegions(tableName).size()); 256 257 HRegion region = cluster.getRegions(tableName).get(0); 258 HStore store = region.getStore(cf); 259 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 260 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 261 262 Table t = TESTING_UTIL.getConnection().getTable(tableName); 263 // insert data 264 insertData(tableName, admin, t); 265 insertData(tableName, admin, t); 266 267 int fileNum = store.getStorefiles().size(); 268 // 0, Compaction Request 269 store.triggerMajorCompaction(); 270 Optional<CompactionContext> cc = store.requestCompaction(); 271 assertTrue(cc.isPresent()); 272 // 1, A timeout split 273 // 1.1 close region 274 assertEquals(2, region.close(false).get(cf).size()); 275 // 1.2 rollback and Region initialize again 276 region.initialize(); 277 278 // 2, Run Compaction cc 279 assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE)); 280 assertTrue(fileNum > store.getStorefiles().size()); 281 282 // 3, Split 283 requestSplitRegion(regionServer, region, Bytes.toBytes("row3")); 284 assertEquals(2, cluster.getRegions(tableName).size()); 285 } 286 287 @Test 288 public void testSplitCompactWithPriority() throws Exception { 289 final TableName tableName = TableName.valueOf(name.getMethodName()); 290 // Create table then get the single region for our new table. 291 byte[] cf = Bytes.toBytes("cf"); 292 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 293 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 294 admin.createTable(htd); 295 296 assertNotEquals("Unable to retrieve regions of the table", -1, 297 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 298 299 HRegion region = cluster.getRegions(tableName).get(0); 300 HStore store = region.getStore(cf); 301 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 302 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 303 304 Table table = TESTING_UTIL.getConnection().getTable(tableName); 305 // insert data 306 insertData(tableName, admin, table); 307 insertData(tableName, admin, table, 20); 308 insertData(tableName, admin, table, 40); 309 310 // Compaction Request 311 store.triggerMajorCompaction(); 312 Optional<CompactionContext> compactionContext = store.requestCompaction(); 313 assertTrue(compactionContext.isPresent()); 314 assertFalse(compactionContext.get().getRequest().isAfterSplit()); 315 assertEquals(compactionContext.get().getRequest().getPriority(), 13); 316 317 // Split 318 long procId = 319 cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0); 320 321 // wait for the split to complete or get interrupted. If the split completes successfully, 322 // the procedure will return true; if the split fails, the procedure would throw exception. 323 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), 324 procId); 325 Thread.sleep(3000); 326 assertNotEquals("Table is not split properly?", -1, 327 TESTING_UTIL.waitFor(3000, 328 () -> cluster.getRegions(tableName).size() == 2)); 329 // we have 2 daughter regions 330 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 331 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 332 HStore hStore1 = hRegion1.getStore(cf); 333 HStore hStore2 = hRegion2.getStore(cf); 334 335 // For hStore1 && hStore2, set mock reference to one of the storeFiles 336 StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo(); 337 StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo(); 338 Field field = StoreFileInfo.class.getDeclaredField("reference"); 339 field.setAccessible(true); 340 field.set(storeFileInfo1, Mockito.mock(Reference.class)); 341 field.set(storeFileInfo2, Mockito.mock(Reference.class)); 342 hStore1.triggerMajorCompaction(); 343 hStore2.triggerMajorCompaction(); 344 345 compactionContext = hStore1.requestCompaction(); 346 assertTrue(compactionContext.isPresent()); 347 // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true && 348 // highest priority for hStore1's compactionContext 349 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 350 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000); 351 352 compactionContext = 353 hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null); 354 assertTrue(compactionContext.isPresent()); 355 // compaction request contains higher priority than default priority of daughter region 356 // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to 357 // be accepted. 358 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 359 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10); 360 admin.disableTable(tableName); 361 admin.deleteTable(tableName); 362 } 363 364 public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver { 365 volatile CountDownLatch latch; 366 367 @Override 368 public void start(CoprocessorEnvironment e) throws IOException { 369 latch = new CountDownLatch(1); 370 } 371 372 @Override 373 public Optional<MasterObserver> getMasterObserver() { 374 return Optional.of(this); 375 } 376 377 @Override 378 public void preSplitRegionBeforeMETAAction( 379 final ObserverContext<MasterCoprocessorEnvironment> ctx, 380 final byte[] splitKey, 381 final List<Mutation> metaEntries) throws IOException { 382 latch.countDown(); 383 throw new IOException("Causing rollback of region split"); 384 } 385 } 386 387 @Test 388 public void testSplitRollbackOnRegionClosing() throws Exception { 389 final TableName tableName = TableName.valueOf(name.getMethodName()); 390 391 // Create table then get the single region for our new table. 392 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 393 List<HRegion> regions = cluster.getRegions(tableName); 394 RegionInfo hri = getAndCheckSingleTableRegion(regions); 395 396 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 397 398 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); 399 400 // Turn off balancer so it doesn't cut in and mess up our placements. 401 this.admin.balancerSwitch(false, true); 402 // Turn off the meta scanner so it don't remove parent on us. 403 cluster.getMaster().setCatalogJanitorEnabled(false); 404 try { 405 // Add a bit of load up into the table so splittable. 406 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 407 // Get region pre-split. 408 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 409 printOutRegions(server, "Initial regions: "); 410 int regionCount = cluster.getRegions(hri.getTable()).size(); 411 regionStates.updateRegionState(hri, RegionState.State.CLOSING); 412 413 // Now try splitting.... should fail. And each should successfully 414 // rollback. 415 // We don't roll back here anymore. Instead we fail-fast on construction of the 416 // split transaction. Catch the exception instead. 417 try { 418 this.admin.splitRegionAsync(hri.getRegionName()); 419 fail(); 420 } catch (DoNotRetryRegionException e) { 421 // Expected 422 } 423 // Wait around a while and assert count of regions remains constant. 424 for (int i = 0; i < 10; i++) { 425 Thread.sleep(100); 426 assertEquals(regionCount, cluster.getRegions(hri.getTable()).size()); 427 } 428 regionStates.updateRegionState(hri, State.OPEN); 429 // Now try splitting and it should work. 430 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 431 // Get daughters 432 checkAndGetDaughters(tableName); 433 // OK, so split happened after we cleared the blocking node. 434 } finally { 435 admin.balancerSwitch(true, false); 436 cluster.getMaster().setCatalogJanitorEnabled(true); 437 t.close(); 438 } 439 } 440 441 /** 442 * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't 443 * find the immediate daughter of an offlined parent. 444 */ 445 @Test 446 public void testShutdownFixupWhenDaughterHasSplit() throws Exception { 447 final TableName tableName = TableName.valueOf(name.getMethodName()); 448 449 // Create table then get the single region for our new table. 450 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); List<HRegion> regions = 451 cluster.getRegions(tableName); RegionInfo hri = getAndCheckSingleTableRegion(regions); 452 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 453 454 // Turn off balancer so it doesn't cut in and mess up our placements. 455 this.admin.balancerSwitch(false, true); 456 // Turn off the meta scanner so it don't remove parent on us. 457 cluster.getMaster().setCatalogJanitorEnabled(false); 458 try { 459 // Add a bit of load up into the table so splittable. 460 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 461 // Get region pre-split. 462 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 463 printOutRegions(server, "Initial regions: "); 464 // Now split. 465 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 466 // Get daughters 467 List<HRegion> daughters = checkAndGetDaughters(tableName); 468 // Now split one of the daughters. 469 HRegion daughterRegion = daughters.get(0); 470 RegionInfo daughter = daughterRegion.getRegionInfo(); 471 LOG.info("Daughter we are going to split: " + daughter); 472 clearReferences(daughterRegion); 473 LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences()); 474 admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES); 475 // Get list of daughters 476 daughters = cluster.getRegions(tableName); 477 for (HRegion d: daughters) { 478 LOG.info("Regions before crash: " + d); 479 } 480 // Now crash the server 481 cluster.abortRegionServer(tableRegionIndex); 482 waitUntilRegionServerDead(); 483 awaitDaughters(tableName, daughters.size()); 484 // Assert daughters are online and ONLY the original daughters -- that 485 // fixup didn't insert one during server shutdown recover. 486 regions = cluster.getRegions(tableName); 487 for (HRegion d: daughters) { 488 LOG.info("Regions after crash: " + d); 489 } 490 if (daughters.size() != regions.size()) { 491 LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size()); 492 } 493 assertEquals(daughters.size(), regions.size()); 494 for (HRegion r: regions) { 495 LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r)); 496 assertTrue("Missing region post crash " + r, daughters.contains(r)); 497 } 498 } finally { 499 LOG.info("EXITING"); 500 admin.balancerSwitch(true, false); 501 cluster.getMaster().setCatalogJanitorEnabled(true); 502 t.close(); 503 } 504 } 505 506 private void clearReferences(HRegion region) throws IOException { 507 // Presumption. 508 assertEquals(1, region.getStores().size()); 509 HStore store = region.getStores().get(0); 510 while (store.hasReferences()) { 511 // Wait on any current compaction to complete first. 512 CompactionProgress progress = store.getCompactionProgress(); 513 if (progress != null && progress.getProgressPct() < 1.0f) { 514 while (progress.getProgressPct() < 1.0f) { 515 LOG.info("Waiting, progress={}", progress.getProgressPct()); 516 Threads.sleep(1000); 517 } 518 } else { 519 // Run new compaction. Shoudn't be any others running. 520 region.compact(true); 521 } 522 store.closeAndArchiveCompactedFiles(); 523 } 524 } 525 526 @Test 527 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception { 528 TableName userTableName = TableName.valueOf(name.getMethodName()); 529 TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName) 530 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build(); 531 admin.createTable(htd); 532 Table table = TESTING_UTIL.getConnection().getTable(userTableName); 533 try { 534 for (int i = 0; i <= 5; i++) { 535 String row = "row" + i; 536 Put p = new Put(row.getBytes()); 537 String val = "Val" + i; 538 p.addColumn("col".getBytes(), "ql".getBytes(), val.getBytes()); 539 table.put(p); 540 admin.flush(userTableName); 541 Delete d = new Delete(row.getBytes()); 542 // Do a normal delete 543 table.delete(d); 544 admin.flush(userTableName); 545 } 546 admin.majorCompact(userTableName); 547 List<RegionInfo> regionsOfTable = 548 cluster.getMaster().getAssignmentManager().getRegionStates() 549 .getRegionsOfTable(userTableName); 550 assertEquals(1, regionsOfTable.size()); 551 RegionInfo hRegionInfo = regionsOfTable.get(0); 552 Put p = new Put("row6".getBytes()); 553 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 554 table.put(p); 555 p = new Put("row7".getBytes()); 556 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 557 table.put(p); 558 p = new Put("row8".getBytes()); 559 p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes()); 560 table.put(p); 561 admin.flush(userTableName); 562 admin.splitRegionAsync(hRegionInfo.getRegionName(), "row7".getBytes()); 563 regionsOfTable = cluster.getMaster() 564 .getAssignmentManager().getRegionStates() 565 .getRegionsOfTable(userTableName); 566 567 while (regionsOfTable.size() != 2) { 568 Thread.sleep(1000); 569 regionsOfTable = cluster.getMaster() 570 .getAssignmentManager().getRegionStates() 571 .getRegionsOfTable(userTableName); 572 LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + 573 ": " + regionsOfTable); 574 575 } 576 Assert.assertEquals(2, regionsOfTable.size()); 577 578 Scan s = new Scan(); 579 ResultScanner scanner = table.getScanner(s); 580 int mainTableCount = 0; 581 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { 582 mainTableCount++; 583 } 584 Assert.assertEquals(3, mainTableCount); 585 } finally { 586 table.close(); 587 } 588 } 589 590 /** 591 * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could 592 * remove the parent region the master is killed and restarted. 593 */ 594 @Test 595 public void testMasterRestartAtRegionSplitPendingCatalogJanitor() 596 throws IOException, InterruptedException, NodeExistsException, KeeperException, 597 ServiceException, ExecutionException, TimeoutException { 598 final TableName tableName = TableName.valueOf(name.getMethodName()); 599 // Create table then get the single region for our new table. 600 try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) { 601 List<HRegion> regions = cluster.getRegions(tableName); 602 RegionInfo hri = getAndCheckSingleTableRegion(regions); 603 604 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 605 606 // Turn off balancer so it doesn't cut in and mess up our placements. 607 this.admin.balancerSwitch(false, true); 608 // Turn off the meta scanner so it don't remove parent on us. 609 cluster.getMaster().setCatalogJanitorEnabled(false); 610 // Add a bit of load up into the table so splittable. 611 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 612 // Get region pre-split. 613 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 614 printOutRegions(server, "Initial regions: "); 615 // Call split. 616 this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 617 List<HRegion> daughters = checkAndGetDaughters(tableName); 618 619 // Before cleanup, get a new master. 620 HMaster master = abortAndWaitForMaster(); 621 // Now call compact on the daughters and clean up any references. 622 for (HRegion daughter : daughters) { 623 clearReferences(daughter); 624 assertFalse(daughter.hasReferences()); 625 } 626 // BUT calling compact on the daughters is not enough. The CatalogJanitor looks 627 // in the filesystem, and the filesystem content is not same as what the Region 628 // is reading from. Compacted-away files are picked up later by the compacted 629 // file discharger process. It runs infrequently. Make it run so CatalogJanitor 630 // doens't find any references. 631 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 632 boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); 633 rst.getRegionServer().compactedFileDischarger.run(); 634 rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); 635 } 636 cluster.getMaster().setCatalogJanitorEnabled(true); 637 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 638 LOG.info("Starting run of CatalogJanitor"); 639 cluster.getMaster().getCatalogJanitor().run(); 640 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 641 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 642 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri); 643 assertEquals(null, regionServerOfRegion); 644 } finally { 645 TESTING_UTIL.getAdmin().balancerSwitch(true, false); 646 cluster.getMaster().setCatalogJanitorEnabled(true); 647 } 648 } 649 650 @Test 651 public void testSplitWithRegionReplicas() throws Exception { 652 final TableName tableName = TableName.valueOf(name.getMethodName()); 653 HTableDescriptor htd = TESTING_UTIL.createTableDescriptor(name.getMethodName()); 654 htd.setRegionReplication(2); 655 htd.addCoprocessor(SlowMeCopro.class.getName()); 656 // Create table then get the single region for our new table. 657 Table t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")}, null); 658 List<HRegion> oldRegions; 659 do { 660 oldRegions = cluster.getRegions(tableName); 661 Thread.sleep(10); 662 } while (oldRegions.size() != 2); 663 for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo()); 664 try { 665 int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo() 666 .getRegionName()); 667 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 668 insertData(tableName, admin, t); 669 // Turn off balancer so it doesn't cut in and mess up our placements. 670 admin.balancerSwitch(false, true); 671 // Turn off the meta scanner so it don't remove parent on us. 672 cluster.getMaster().setCatalogJanitorEnabled(false); 673 boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(), 674 tableName); 675 assertEquals("The specified table should be present.", true, tableExists); 676 final HRegion region = findSplittableRegion(oldRegions); 677 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 678 regionServer = cluster.getRegionServer(regionServerIndex); 679 assertTrue("not able to find a splittable region", region != null); 680 try { 681 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 682 } catch (IOException e) { 683 e.printStackTrace(); 684 fail("Split execution should have succeeded with no exceptions thrown " + e); 685 } 686 //TESTING_UTIL.waitUntilAllRegionsAssigned(tableName); 687 List<HRegion> newRegions; 688 do { 689 newRegions = cluster.getRegions(tableName); 690 for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo()); 691 Thread.sleep(1000); 692 } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1))) 693 || newRegions.size() != 4); 694 tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(), 695 tableName); 696 assertEquals("The specified table should be present.", true, tableExists); 697 // exists works on stale and we see the put after the flush 698 byte[] b1 = "row1".getBytes(); 699 Get g = new Get(b1); 700 g.setConsistency(Consistency.STRONG); 701 // The following GET will make a trip to the meta to get the new location of the 1st daughter 702 // In the process it will also get the location of the replica of the daughter (initially 703 // pointing to the parent's replica) 704 Result r = t.get(g); 705 Assert.assertFalse(r.isStale()); 706 LOG.info("exists stale after flush done"); 707 708 SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1)); 709 g = new Get(b1); 710 g.setConsistency(Consistency.TIMELINE); 711 // This will succeed because in the previous GET we get the location of the replica 712 r = t.get(g); 713 Assert.assertTrue(r.isStale()); 714 SlowMeCopro.getPrimaryCdl().get().countDown(); 715 } finally { 716 SlowMeCopro.getPrimaryCdl().get().countDown(); 717 admin.balancerSwitch(true, false); 718 cluster.getMaster().setCatalogJanitorEnabled(true); 719 t.close(); 720 } 721 } 722 723 private void insertData(final TableName tableName, Admin admin, Table t) throws IOException { 724 insertData(tableName, admin, t, 1); 725 } 726 727 private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException { 728 Put p = new Put(Bytes.toBytes("row" + i)); 729 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); 730 t.put(p); 731 p = new Put(Bytes.toBytes("row" + (i + 1))); 732 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); 733 t.put(p); 734 p = new Put(Bytes.toBytes("row" + (i + 2))); 735 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); 736 t.put(p); 737 p = new Put(Bytes.toBytes("row" + (i + 3))); 738 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); 739 t.put(p); 740 admin.flush(tableName); 741 } 742 743 /** 744 * If a table has regions that have no store files in a region, they should split successfully 745 * into two regions with no store files. 746 */ 747 @Test 748 public void testSplitRegionWithNoStoreFiles() throws Exception { 749 final TableName tableName = TableName.valueOf(name.getMethodName()); 750 // Create table then get the single region for our new table. 751 createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 752 List<HRegion> regions = cluster.getRegions(tableName); 753 RegionInfo hri = getAndCheckSingleTableRegion(regions); 754 ensureTableRegionNotOnSameServerAsMeta(admin, hri); 755 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo() 756 .getRegionName()); 757 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 758 // Turn off balancer so it doesn't cut in and mess up our placements. 759 this.admin.balancerSwitch(false, true); 760 // Turn off the meta scanner so it don't remove parent on us. 761 cluster.getMaster().setCatalogJanitorEnabled(false); 762 try { 763 // Precondition: we created a table with no data, no store files. 764 printOutRegions(regionServer, "Initial regions: "); 765 Configuration conf = cluster.getConfiguration(); 766 HBaseFsck.debugLsr(conf, new Path("/")); 767 Path rootDir = CommonFSUtils.getRootDir(conf); 768 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem(); 769 Map<String, Path> storefiles = 770 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 771 assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size()); 772 773 // find a splittable region. Refresh the regions list 774 regions = cluster.getRegions(tableName); 775 final HRegion region = findSplittableRegion(regions); 776 assertTrue("not able to find a splittable region", region != null); 777 778 // Now split. 779 try { 780 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 781 } catch (IOException e) { 782 fail("Split execution should have succeeded with no exceptions thrown"); 783 } 784 785 // Postcondition: split the table with no store files into two regions, but still have no 786 // store files 787 List<HRegion> daughters = cluster.getRegions(tableName); 788 assertEquals(2, daughters.size()); 789 790 // check dirs 791 HBaseFsck.debugLsr(conf, new Path("/")); 792 Map<String, Path> storefilesAfter = 793 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 794 assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0, 795 storefilesAfter.size()); 796 797 hri = region.getRegionInfo(); // split parent 798 AssignmentManager am = cluster.getMaster().getAssignmentManager(); 799 RegionStates regionStates = am.getRegionStates(); 800 long start = EnvironmentEdgeManager.currentTime(); 801 while (!regionStates.isRegionInState(hri, State.SPLIT)) { 802 LOG.debug("Waiting for SPLIT state on: " + hri); 803 assertFalse("Timed out in waiting split parent to be in state SPLIT", 804 EnvironmentEdgeManager.currentTime() - start > 60000); 805 Thread.sleep(500); 806 } 807 assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN)); 808 assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN)); 809 810 // We should not be able to assign it again 811 try { 812 am.assign(hri); 813 } catch (DoNotRetryIOException e) { 814 // Expected 815 } 816 assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri)); 817 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 818 819 // We should not be able to unassign it either 820 try { 821 am.unassign(hri); 822 fail("Should have thrown exception"); 823 } catch (DoNotRetryIOException e) { 824 // Expected 825 } 826 assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri)); 827 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 828 } finally { 829 admin.balancerSwitch(true, false); 830 cluster.getMaster().setCatalogJanitorEnabled(true); 831 } 832 } 833 834 @Test 835 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() 836 throws Exception { 837 final TableName tableName = TableName.valueOf(name.getMethodName()); 838 try { 839 byte[] cf = Bytes.toBytes("f"); 840 byte[] cf1 = Bytes.toBytes("i_f"); 841 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 842 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)) 843 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1)) 844 .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build(); 845 admin.createTable(htd); 846 List<HRegion> regions = awaitTableRegions(tableName); 847 HRegion region = regions.get(0); 848 for(int i = 3;i<9;i++) { 849 Put p = new Put(Bytes.toBytes("row"+i)); 850 p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 851 p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 852 region.put(p); 853 } 854 region.flush(true); 855 HStore store = region.getStore(cf); 856 Collection<HStoreFile> storefiles = store.getStorefiles(); 857 assertEquals(1, storefiles.size()); 858 assertFalse(region.hasReferences()); 859 Path referencePath = 860 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f", 861 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 862 assertNull(referencePath); 863 referencePath = 864 region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f", 865 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy()); 866 assertNotNull(referencePath); 867 } finally { 868 TESTING_UTIL.deleteTable(tableName); 869 } 870 } 871 872 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException { 873 for (int i = 0; i < 5; ++i) { 874 for (HRegion r: regions) { 875 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) { 876 return(r); 877 } 878 } 879 Thread.sleep(100); 880 } 881 return null; 882 } 883 884 private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException { 885 List<HRegion> daughters = null; 886 // try up to 10s 887 for (int i = 0; i < 100; i++) { 888 daughters = cluster.getRegions(tableName); 889 if (daughters.size() >= 2) { 890 break; 891 } 892 Thread.sleep(100); 893 } 894 assertTrue(daughters.size() >= 2); 895 return daughters; 896 } 897 898 private HMaster abortAndWaitForMaster() throws IOException, InterruptedException { 899 cluster.abortMaster(0); 900 cluster.waitOnMaster(0); 901 HMaster master = cluster.startMaster().getMaster(); 902 cluster.waitForActiveAndReadyMaster(); 903 // reset the connections 904 IOUtils.closeQuietly(admin); 905 TESTING_UTIL.invalidateConnection(); 906 admin = TESTING_UTIL.getAdmin(); 907 return master; 908 } 909 910 /** 911 * Ensure single table region is not on same server as the single hbase:meta table 912 * region. 913 * @return Index of the server hosting the single table region 914 * @throws UnknownRegionException 915 * @throws MasterNotRunningException 916 * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException 917 * @throws InterruptedException 918 */ 919 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, 920 final RegionInfo hri) 921 throws IOException, MasterNotRunningException, 922 ZooKeeperConnectionException, InterruptedException { 923 // Now make sure that the table region is not on same server as that hosting 924 // hbase:meta We don't want hbase:meta replay polluting our test when we later crash 925 // the table region serving server. 926 int metaServerIndex = cluster.getServerWithMeta(); 927 boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(TESTING_UTIL.getConfiguration()); 928 if (tablesOnMaster) { 929 // Need to check master is supposed to host meta... perhaps it is not. 930 throw new UnsupportedOperationException(); 931 // TODO: assertTrue(metaServerIndex == -1); // meta is on master now 932 } 933 HRegionServer metaRegionServer = tablesOnMaster? 934 cluster.getMaster(): cluster.getRegionServer(metaServerIndex); 935 int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 936 assertTrue(tableRegionIndex != -1); 937 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); 938 LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + 939 ", other=" + tableRegionServer.getServerName()); 940 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { 941 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); 942 assertNotNull(hrs); 943 assertNotNull(hri); 944 LOG.info("Moving " + hri.getRegionNameAsString() + " from " + 945 metaRegionServer.getServerName() + " to " + 946 hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); 947 admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName()); 948 } 949 // Wait till table region is up on the server that is NOT carrying hbase:meta. 950 for (int i = 0; i < 100; i++) { 951 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 952 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; 953 LOG.debug("Waiting on region move off the hbase:meta server; current index " + 954 tableRegionIndex + " and metaServerIndex=" + metaServerIndex); 955 Thread.sleep(100); 956 } 957 assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex, 958 tableRegionIndex != -1 && tableRegionIndex != metaServerIndex); 959 // Verify for sure table region is not on same server as hbase:meta 960 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 961 assertTrue(tableRegionIndex != -1); 962 assertNotSame(metaServerIndex, tableRegionIndex); 963 return tableRegionIndex; 964 } 965 966 /** 967 * Find regionserver other than the one passed. 968 * Can't rely on indexes into list of regionservers since crashed servers 969 * occupy an index. 970 * @param cluster 971 * @param notThisOne 972 * @return A regionserver that is not <code>notThisOne</code> or null if none 973 * found 974 */ 975 private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster, 976 final HRegionServer notThisOne) { 977 for (RegionServerThread rst: cluster.getRegionServerThreads()) { 978 HRegionServer hrs = rst.getRegionServer(); 979 if (hrs.getServerName().equals(notThisOne.getServerName())) continue; 980 if (hrs.isStopping() || hrs.isStopped()) continue; 981 return hrs; 982 } 983 return null; 984 } 985 986 private void printOutRegions(final HRegionServer hrs, final String prefix) 987 throws IOException { 988 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 989 for (RegionInfo region: regions) { 990 LOG.info(prefix + region.getRegionNameAsString()); 991 } 992 } 993 994 private void waitUntilRegionServerDead() throws InterruptedException, IOException { 995 // Wait until the master processes the RS shutdown 996 for (int i=0; (cluster.getMaster().getClusterMetrics() 997 .getLiveServerMetrics().size() > NB_SERVERS 998 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i<100; i++) { 999 LOG.info("Waiting on server to go down"); 1000 Thread.sleep(100); 1001 } 1002 assertFalse("Waited too long for RS to die", 1003 cluster.getMaster().getClusterMetrics(). getLiveServerMetrics().size() > NB_SERVERS 1004 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS); 1005 } 1006 1007 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException { 1008 // Wait till regions are back on line again. 1009 for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) { 1010 LOG.info("Waiting for repair to happen"); 1011 Thread.sleep(1000); 1012 } 1013 if (cluster.getRegions(tableName).size() < numDaughters) { 1014 fail("Waiting too long for daughter regions"); 1015 } 1016 } 1017 1018 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException { 1019 List<HRegion> regions = null; 1020 for (int i = 0; i < 100; i++) { 1021 regions = cluster.getRegions(tableName); 1022 if (regions.size() > 0) break; 1023 Thread.sleep(100); 1024 } 1025 return regions; 1026 } 1027 1028 private Table createTableAndWait(TableName tableName, byte[] cf) throws IOException, 1029 InterruptedException { 1030 Table t = TESTING_UTIL.createTable(tableName, cf); 1031 awaitTableRegions(tableName); 1032 assertTrue("Table not online: " + tableName, 1033 cluster.getRegions(tableName).size() != 0); 1034 return t; 1035 } 1036 1037 // Make it public so that JVMClusterUtil can access it. 1038 public static class MyMaster extends HMaster { 1039 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException { 1040 super(conf); 1041 } 1042 1043 @Override 1044 protected RSRpcServices createRpcServices() throws IOException { 1045 return new MyMasterRpcServices(this); 1046 } 1047 } 1048 1049 static class MyMasterRpcServices extends MasterRpcServices { 1050 static AtomicBoolean enabled = new AtomicBoolean(false); 1051 1052 private HMaster myMaster; 1053 public MyMasterRpcServices(HMaster master) throws IOException { 1054 super(master); 1055 myMaster = master; 1056 } 1057 1058 @Override 1059 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c, 1060 ReportRegionStateTransitionRequest req) throws ServiceException { 1061 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req); 1062 if (enabled.get() && req.getTransition(0).getTransitionCode().equals( 1063 TransitionCode.READY_TO_SPLIT) && !resp.hasErrorMessage()) { 1064 RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates(); 1065 for (RegionStateNode regionState: 1066 regionStates.getRegionsInTransition()) { 1067 /* TODO!!!! 1068 // Find the merging_new region and remove it 1069 if (regionState.isSplittingNew()) { 1070 regionStates.deleteRegion(regionState.getRegion()); 1071 } 1072 */ 1073 } 1074 } 1075 return resp; 1076 } 1077 } 1078 1079 static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 1080 1081 @Override 1082 protected boolean shouldSplit() { 1083 return true; 1084 } 1085 1086 @Override 1087 public boolean skipStoreFileRangeCheck(String familyName) { 1088 if(familyName.startsWith("i_")) { 1089 return true; 1090 } else { 1091 return false; 1092 } 1093 } 1094 } 1095} 1096