001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtil; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.Admin; 034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.SnapshotDescription; 038import org.apache.hadoop.hbase.client.SnapshotType; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; 043import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 044import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; 045import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure; 046import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; 047import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 048import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 049import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 050import org.apache.hadoop.hbase.regionserver.HRegion; 051import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 052import org.apache.hadoop.hbase.testclassification.LargeTests; 053import org.apache.hadoop.hbase.testclassification.MasterTests; 054import org.apache.hadoop.hbase.util.Bytes; 055import org.apache.hadoop.hbase.util.Threads; 056import org.junit.After; 057import org.junit.AfterClass; 058import org.junit.Before; 059import org.junit.BeforeClass; 060import org.junit.ClassRule; 061import org.junit.Rule; 062import org.junit.Test; 063import org.junit.experimental.categories.Category; 064import org.junit.rules.TestName; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 069import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 070 071@Category({ MasterTests.class, LargeTests.class }) 072public class TestMergeTableRegionsProcedure { 073 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class); 077 078 private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class); 079 @Rule 080 public final TestName name = new TestName(); 081 082 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 083 084 private static final int initialRegionCount = 4; 085 private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); 086 private static Admin admin; 087 088 private ProcedureMetrics mergeProcMetrics; 089 private ProcedureMetrics assignProcMetrics; 090 private ProcedureMetrics unassignProcMetrics; 091 private long mergeSubmittedCount = 0; 092 private long mergeFailedCount = 0; 093 private long assignSubmittedCount = 0; 094 private long assignFailedCount = 0; 095 private long unassignSubmittedCount = 0; 096 private long unassignFailedCount = 0; 097 098 private static void setupConf(Configuration conf) { 099 // Reduce the maximum attempts to speed up the test 100 conf.setInt("hbase.assignment.maximum.attempts", 3); 101 conf.setInt("hbase.master.maximum.ping.server.attempts", 3); 102 conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1); 103 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 104 conf.set("hbase.coprocessor.region.classes", 105 RegionServerHostingReplicaSlowOpenCoprocessor.class.getName()); 106 } 107 108 @BeforeClass 109 public static void setupCluster() throws Exception { 110 setupConf(UTIL.getConfiguration()); 111 UTIL.startMiniCluster(1); 112 admin = UTIL.getAdmin(); 113 } 114 115 @AfterClass 116 public static void cleanupTest() throws Exception { 117 UTIL.shutdownMiniCluster(); 118 } 119 120 @Before 121 public void setup() throws Exception { 122 resetProcExecutorTestingKillFlag(); 123 MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); 124 MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); 125 // Turn off balancer so it doesn't cut in and mess up our placements. 126 admin.balancerSwitch(false, true); 127 // Turn off the meta scanner so it don't remove parent on us. 128 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 129 resetProcExecutorTestingKillFlag(); 130 AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 131 mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); 132 assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); 133 unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); 134 } 135 136 @After 137 public void tearDown() throws Exception { 138 resetProcExecutorTestingKillFlag(); 139 for (TableDescriptor htd : admin.listTableDescriptors()) { 140 LOG.info("Tear down, remove table=" + htd.getTableName()); 141 UTIL.deleteTable(htd.getTableName()); 142 } 143 } 144 145 private void resetProcExecutorTestingKillFlag() { 146 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 147 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 148 assertTrue("expected executor to be running", procExec.isRunning()); 149 } 150 151 private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException { 152 List<Put> puts = new ArrayList<>(); 153 for (RegionInfo ri : ris) { 154 Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0 155 ? new byte[] { 'a' } 156 : ri.getStartKey()); 157 put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY, 158 HConstants.CATALOG_FAMILY); 159 puts.add(put); 160 } 161 t.put(puts); 162 return puts.size(); 163 } 164 165 /** 166 * This tests two region merges 167 */ 168 @Test 169 public void testMergeTwoRegions() throws Exception { 170 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 171 UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] { 172 new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } }); 173 testMerge(tableName, 2); 174 } 175 176 private void testMerge(TableName tableName, int mergeCount) throws IOException { 177 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName); 178 int originalRegionCount = ris.size(); 179 assertTrue(originalRegionCount > mergeCount); 180 RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {}); 181 int countOfRowsLoaded = 0; 182 try (Table table = UTIL.getConnection().getTable(tableName)) { 183 countOfRowsLoaded = loadARowPerRegion(table, ris); 184 } 185 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 186 187 // collect AM metrics before test 188 collectAssignmentManagerMetrics(); 189 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 190 MergeTableRegionsProcedure proc = 191 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true); 192 long procId = procExec.submitProcedure(proc); 193 ProcedureTestingUtility.waitProcedure(procExec, procId); 194 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 195 MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection()); 196 assertEquals(originalRegionCount - mergeCount + 1, 197 MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size()); 198 199 assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); 200 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 201 assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); 202 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 203 assertEquals(unassignSubmittedCount + mergeCount, 204 unassignProcMetrics.getSubmittedCounter().getCount()); 205 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 206 207 // Need to get the references cleaned out. Close of region will move them 208 // to archive so disable and reopen just to get rid of references to later 209 // when the catalogjanitor runs, it can do merged region cleanup. 210 admin.disableTable(tableName); 211 admin.enableTable(tableName); 212 213 // Can I purge the merged regions from hbase:meta? Check that all went 214 // well by looking at the merged row up in hbase:meta. It should have no 215 // more mention of the merged regions; they are purged as last step in 216 // the merged regions cleanup. 217 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); 218 UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow(); 219 RegionInfo mergedRegion = proc.getMergedRegion(); 220 RegionStateStore regionStateStore = 221 UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 222 while (ris != null && ris.get(0) != null && ris.get(1) != null) { 223 ris = regionStateStore.getMergeRegions(mergedRegion); 224 LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris); 225 Threads.sleep(1000); 226 } 227 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 228 } 229 230 /** 231 * This tests ten region merges in one go. 232 */ 233 @Test 234 public void testMergeTenRegions() throws Exception { 235 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 236 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 237 UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY); 238 testMerge(tableName, 10); 239 } 240 241 /** 242 * This tests two concurrent region merges 243 */ 244 @Test 245 public void testMergeRegionsConcurrently() throws Exception { 246 final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently"); 247 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 248 249 List<RegionInfo> tableRegions = createTable(tableName); 250 251 RegionInfo[] regionsToMerge1 = new RegionInfo[2]; 252 RegionInfo[] regionsToMerge2 = new RegionInfo[2]; 253 regionsToMerge1[0] = tableRegions.get(0); 254 regionsToMerge1[1] = tableRegions.get(1); 255 regionsToMerge2[0] = tableRegions.get(2); 256 regionsToMerge2[1] = tableRegions.get(3); 257 258 // collect AM metrics before test 259 collectAssignmentManagerMetrics(); 260 261 long procId1 = procExec.submitProcedure( 262 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true)); 263 long procId2 = procExec.submitProcedure( 264 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true)); 265 ProcedureTestingUtility.waitProcedure(procExec, procId1); 266 ProcedureTestingUtility.waitProcedure(procExec, procId2); 267 ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); 268 ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); 269 assertRegionCount(tableName, initialRegionCount - 2); 270 271 assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); 272 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 273 assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); 274 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 275 assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount()); 276 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 277 } 278 279 @Test 280 public void testRecoveryAndDoubleExecution() throws Exception { 281 final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution"); 282 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 283 284 List<RegionInfo> tableRegions = createTable(tableName); 285 286 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 287 ProcedureTestingUtility.setKillIfHasParent(procExec, false); 288 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 289 290 RegionInfo[] regionsToMerge = new RegionInfo[2]; 291 regionsToMerge[0] = tableRegions.get(0); 292 regionsToMerge[1] = tableRegions.get(1); 293 294 long procId = procExec.submitProcedure( 295 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 296 297 // Restart the executor and execute the step twice 298 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); 299 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 300 301 assertRegionCount(tableName, initialRegionCount - 1); 302 } 303 304 @Test 305 public void testRollbackAndDoubleExecution() throws Exception { 306 final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution"); 307 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 308 309 List<RegionInfo> tableRegions = createTable(tableName); 310 311 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 312 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 313 314 RegionInfo[] regionsToMerge = new RegionInfo[2]; 315 regionsToMerge[0] = tableRegions.get(0); 316 regionsToMerge[1] = tableRegions.get(1); 317 318 long procId = procExec.submitProcedure( 319 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 320 321 // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback 322 // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is 323 // hardcoded, so you have to look at this test at least once when you add a new step. 324 int lastStep = 8; 325 MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true); 326 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 327 UTIL.waitUntilAllRegionsAssigned(tableName); 328 List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName); 329 assertEquals(initialRegionCount, regions.size()); 330 } 331 332 @Test 333 public void testMergeWithoutPONR() throws Exception { 334 final TableName tableName = TableName.valueOf("testMergeWithoutPONR"); 335 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 336 337 List<RegionInfo> tableRegions = createTable(tableName); 338 339 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 340 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 341 342 RegionInfo[] regionsToMerge = new RegionInfo[2]; 343 regionsToMerge[0] = tableRegions.get(0); 344 regionsToMerge[1] = tableRegions.get(1); 345 346 long procId = procExec.submitProcedure( 347 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 348 349 // Execute until step 9 of split procedure 350 // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META 351 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false); 352 353 // Unset Toggle Kill and make ProcExec work correctly 354 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 355 MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); 356 ProcedureTestingUtility.waitProcedure(procExec, procId); 357 358 assertRegionCount(tableName, initialRegionCount - 1); 359 } 360 361 @Test 362 public void testMergingRegionWhileTakingSnapshot() throws Exception { 363 final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot"); 364 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 365 366 List<RegionInfo> tableRegions = createTable(tableName); 367 368 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 369 370 SnapshotDescription snapshot = 371 new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); 372 SnapshotProtos.SnapshotDescription snapshotProto = 373 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); 374 snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, 375 UTIL.getHBaseCluster().getMaster().getConfiguration()); 376 long snapshotProcId = procExec.submitProcedure( 377 new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); 378 UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, 379 snapshotProcId); 380 381 RegionInfo[] regionsToMerge = new RegionInfo[2]; 382 regionsToMerge[0] = tableRegions.get(0); 383 regionsToMerge[1] = tableRegions.get(1); 384 385 long mergeProcId = procExec.submitProcedure( 386 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 387 388 ProcedureTestingUtility 389 .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId); 390 ProcedureTestingUtility.waitProcedure( 391 UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId); 392 393 assertProcFailed(procExec, mergeProcId); 394 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 395 } 396 397 @Test 398 public void testMergeDetectsModifyTableProcedure() throws Exception { 399 final TableName tableName = TableName.valueOf(name.getMethodName()); 400 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 401 402 List<RegionInfo> regions = createTable(tableName); 403 404 RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true; 405 TableDescriptor td = TableDescriptorBuilder.newBuilder(admin.getDescriptor(tableName)) 406 .setRegionReplication(2).build(); 407 long modifyProcId = 408 procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), td)); 409 410 // Merge regions of the table, the MergeTableRegionsProcedure will fail because there is a 411 // ModifyTableProcedure in progress 412 MergeTableRegionsProcedure mergeProcedure = new MergeTableRegionsProcedure( 413 procExec.getEnvironment(), regions.toArray(new RegionInfo[0]), false); 414 long mergeProcId = procExec.submitProcedure(mergeProcedure); 415 ProcedureTestingUtility.waitProcedure(procExec, mergeProcId); 416 ProcedureTestingUtility.assertProcFailed(procExec, mergeProcId); 417 418 RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false; 419 ProcedureTestingUtility.waitProcedure(procExec, modifyProcId); 420 ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId); 421 } 422 423 private List<RegionInfo> createTable(final TableName tableName) throws Exception { 424 TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 425 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); 426 byte[][] splitRows = new byte[initialRegionCount - 1][]; 427 for (int i = 0; i < splitRows.length; ++i) { 428 splitRows[i] = Bytes.toBytes(String.format("%d", i)); 429 } 430 admin.createTable(desc, splitRows); 431 return assertRegionCount(tableName, initialRegionCount); 432 } 433 434 public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions) 435 throws Exception { 436 UTIL.waitUntilNoRegionsInTransition(); 437 List<RegionInfo> tableRegions = admin.getRegions(tableName); 438 assertEquals(nregions, tableRegions.size()); 439 return tableRegions; 440 } 441 442 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 443 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 444 } 445 446 private void collectAssignmentManagerMetrics() { 447 mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); 448 mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); 449 450 assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); 451 assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); 452 unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); 453 unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); 454 } 455}