001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed; 021import static org.junit.jupiter.api.Assertions.assertEquals; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HBaseTestingUtil; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.MetaTableAccessor; 031import org.apache.hadoop.hbase.TableName; 032import org.apache.hadoop.hbase.client.Admin; 033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.SnapshotDescription; 037import org.apache.hadoop.hbase.client.SnapshotType; 038import org.apache.hadoop.hbase.client.Table; 039import org.apache.hadoop.hbase.client.TableDescriptor; 040import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 041import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; 042import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 043import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; 044import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure; 045import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; 046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 047import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 048import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 049import org.apache.hadoop.hbase.regionserver.HRegion; 050import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 051import org.apache.hadoop.hbase.testclassification.LargeTests; 052import org.apache.hadoop.hbase.testclassification.MasterTests; 053import org.apache.hadoop.hbase.util.Bytes; 054import org.apache.hadoop.hbase.util.Threads; 055import org.junit.jupiter.api.AfterAll; 056import org.junit.jupiter.api.AfterEach; 057import org.junit.jupiter.api.BeforeAll; 058import org.junit.jupiter.api.BeforeEach; 059import org.junit.jupiter.api.Tag; 060import org.junit.jupiter.api.Test; 061import org.junit.jupiter.api.TestInfo; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 066import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 067 068@Tag(MasterTests.TAG) 069@Tag(LargeTests.TAG) 070public class TestMergeTableRegionsProcedure { 071 072 private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class); 073 private String testMethodName; 074 075 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 076 077 private static final int initialRegionCount = 4; 078 private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); 079 private static Admin admin; 080 081 private ProcedureMetrics mergeProcMetrics; 082 private ProcedureMetrics assignProcMetrics; 083 private ProcedureMetrics unassignProcMetrics; 084 private long mergeSubmittedCount = 0; 085 private long mergeFailedCount = 0; 086 private long assignSubmittedCount = 0; 087 private long assignFailedCount = 0; 088 private long unassignSubmittedCount = 0; 089 private long unassignFailedCount = 0; 090 091 private static void setupConf(Configuration conf) { 092 // Reduce the maximum attempts to speed up the test 093 conf.setInt("hbase.assignment.maximum.attempts", 3); 094 conf.setInt("hbase.master.maximum.ping.server.attempts", 3); 095 conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1); 096 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 097 conf.set("hbase.coprocessor.region.classes", 098 RegionServerHostingReplicaSlowOpenCoprocessor.class.getName()); 099 } 100 101 @BeforeAll 102 public static void setupCluster() throws Exception { 103 setupConf(UTIL.getConfiguration()); 104 UTIL.startMiniCluster(1); 105 admin = UTIL.getAdmin(); 106 } 107 108 @AfterAll 109 public static void cleanupTest() throws Exception { 110 UTIL.shutdownMiniCluster(); 111 } 112 113 @BeforeEach 114 public void setup() throws Exception { 115 resetProcExecutorTestingKillFlag(); 116 MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); 117 MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); 118 // Turn off balancer so it doesn't cut in and mess up our placements. 119 admin.balancerSwitch(false, true); 120 // Turn off the meta scanner so it don't remove parent on us. 121 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 122 resetProcExecutorTestingKillFlag(); 123 AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 124 mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); 125 assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); 126 unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); 127 } 128 129 @BeforeEach 130 public void setTestMethod(TestInfo testInfo) { 131 testMethodName = testInfo.getTestMethod().get().getName(); 132 } 133 134 @AfterEach 135 public void tearDown() throws Exception { 136 resetProcExecutorTestingKillFlag(); 137 for (TableDescriptor htd : admin.listTableDescriptors()) { 138 LOG.info("Tear down, remove table=" + htd.getTableName()); 139 UTIL.deleteTable(htd.getTableName()); 140 } 141 } 142 143 private void resetProcExecutorTestingKillFlag() { 144 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 145 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 146 assertTrue(procExec.isRunning(), "expected executor to be running"); 147 } 148 149 private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException { 150 List<Put> puts = new ArrayList<>(); 151 for (RegionInfo ri : ris) { 152 Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0 153 ? new byte[] { 'a' } 154 : ri.getStartKey()); 155 put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY, 156 HConstants.CATALOG_FAMILY); 157 puts.add(put); 158 } 159 t.put(puts); 160 return puts.size(); 161 } 162 163 /** 164 * This tests two region merges 165 */ 166 @Test 167 public void testMergeTwoRegions() throws Exception { 168 final TableName tableName = TableName.valueOf(testMethodName); 169 UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] { 170 new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } }); 171 testMerge(tableName, 2); 172 } 173 174 private void testMerge(TableName tableName, int mergeCount) throws IOException { 175 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName); 176 int originalRegionCount = ris.size(); 177 assertTrue(originalRegionCount > mergeCount); 178 RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {}); 179 int countOfRowsLoaded = 0; 180 try (Table table = UTIL.getConnection().getTable(tableName)) { 181 countOfRowsLoaded = loadARowPerRegion(table, ris); 182 } 183 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 184 185 // collect AM metrics before test 186 collectAssignmentManagerMetrics(); 187 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 188 MergeTableRegionsProcedure proc = 189 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true); 190 long procId = procExec.submitProcedure(proc); 191 ProcedureTestingUtility.waitProcedure(procExec, procId); 192 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 193 MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection()); 194 assertEquals(originalRegionCount - mergeCount + 1, 195 MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size()); 196 197 assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); 198 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 199 assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); 200 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 201 assertEquals(unassignSubmittedCount + mergeCount, 202 unassignProcMetrics.getSubmittedCounter().getCount()); 203 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 204 205 // Need to get the references cleaned out. Close of region will move them 206 // to archive so disable and reopen just to get rid of references to later 207 // when the catalogjanitor runs, it can do merged region cleanup. 208 admin.disableTable(tableName); 209 admin.enableTable(tableName); 210 211 // Can I purge the merged regions from hbase:meta? Check that all went 212 // well by looking at the merged row up in hbase:meta. It should have no 213 // more mention of the merged regions; they are purged as last step in 214 // the merged regions cleanup. 215 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); 216 UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow(); 217 RegionInfo mergedRegion = proc.getMergedRegion(); 218 RegionStateStore regionStateStore = 219 UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 220 while (ris != null && ris.get(0) != null && ris.get(1) != null) { 221 ris = regionStateStore.getMergeRegions(mergedRegion); 222 LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris); 223 Threads.sleep(1000); 224 } 225 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 226 } 227 228 /** 229 * This tests ten region merges in one go. 230 */ 231 @Test 232 public void testMergeTenRegions() throws Exception { 233 final TableName tableName = TableName.valueOf(testMethodName); 234 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 235 UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY); 236 testMerge(tableName, 10); 237 } 238 239 /** 240 * This tests two concurrent region merges 241 */ 242 @Test 243 public void testMergeRegionsConcurrently() throws Exception { 244 final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently"); 245 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 246 247 List<RegionInfo> tableRegions = createTable(tableName); 248 249 RegionInfo[] regionsToMerge1 = new RegionInfo[2]; 250 RegionInfo[] regionsToMerge2 = new RegionInfo[2]; 251 regionsToMerge1[0] = tableRegions.get(0); 252 regionsToMerge1[1] = tableRegions.get(1); 253 regionsToMerge2[0] = tableRegions.get(2); 254 regionsToMerge2[1] = tableRegions.get(3); 255 256 // collect AM metrics before test 257 collectAssignmentManagerMetrics(); 258 259 long procId1 = procExec.submitProcedure( 260 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true)); 261 long procId2 = procExec.submitProcedure( 262 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true)); 263 ProcedureTestingUtility.waitProcedure(procExec, procId1); 264 ProcedureTestingUtility.waitProcedure(procExec, procId2); 265 ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); 266 ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); 267 assertRegionCount(tableName, initialRegionCount - 2); 268 269 assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); 270 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 271 assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); 272 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 273 assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount()); 274 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 275 } 276 277 @Test 278 public void testRecoveryAndDoubleExecution() throws Exception { 279 final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution"); 280 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 281 282 List<RegionInfo> tableRegions = createTable(tableName); 283 284 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 285 ProcedureTestingUtility.setKillIfHasParent(procExec, false); 286 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 287 288 RegionInfo[] regionsToMerge = new RegionInfo[2]; 289 regionsToMerge[0] = tableRegions.get(0); 290 regionsToMerge[1] = tableRegions.get(1); 291 292 long procId = procExec.submitProcedure( 293 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 294 295 // Restart the executor and execute the step twice 296 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); 297 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 298 299 assertRegionCount(tableName, initialRegionCount - 1); 300 } 301 302 @Test 303 public void testRollbackAndDoubleExecution() throws Exception { 304 final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution"); 305 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 306 307 List<RegionInfo> tableRegions = createTable(tableName); 308 309 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 310 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 311 312 RegionInfo[] regionsToMerge = new RegionInfo[2]; 313 regionsToMerge[0] = tableRegions.get(0); 314 regionsToMerge[1] = tableRegions.get(1); 315 316 long procId = procExec.submitProcedure( 317 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 318 319 // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback 320 // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is 321 // hardcoded, so you have to look at this test at least once when you add a new step. 322 int lastStep = 8; 323 MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true); 324 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 325 UTIL.waitUntilAllRegionsAssigned(tableName); 326 List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName); 327 assertEquals(initialRegionCount, regions.size()); 328 } 329 330 @Test 331 public void testMergeWithoutPONR() throws Exception { 332 final TableName tableName = TableName.valueOf("testMergeWithoutPONR"); 333 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 334 335 List<RegionInfo> tableRegions = createTable(tableName); 336 337 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 338 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 339 340 RegionInfo[] regionsToMerge = new RegionInfo[2]; 341 regionsToMerge[0] = tableRegions.get(0); 342 regionsToMerge[1] = tableRegions.get(1); 343 344 long procId = procExec.submitProcedure( 345 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 346 347 // Execute until step 9 of split procedure 348 // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META 349 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false); 350 351 // Unset Toggle Kill and make ProcExec work correctly 352 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 353 MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); 354 ProcedureTestingUtility.waitProcedure(procExec, procId); 355 356 assertRegionCount(tableName, initialRegionCount - 1); 357 } 358 359 @Test 360 public void testMergingRegionWhileTakingSnapshot() throws Exception { 361 final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot"); 362 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 363 364 List<RegionInfo> tableRegions = createTable(tableName); 365 366 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 367 368 SnapshotDescription snapshot = 369 new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); 370 SnapshotProtos.SnapshotDescription snapshotProto = 371 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); 372 snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, 373 UTIL.getHBaseCluster().getMaster().getConfiguration()); 374 long snapshotProcId = procExec.submitProcedure( 375 new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); 376 UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, 377 snapshotProcId); 378 379 RegionInfo[] regionsToMerge = new RegionInfo[2]; 380 regionsToMerge[0] = tableRegions.get(0); 381 regionsToMerge[1] = tableRegions.get(1); 382 383 long mergeProcId = procExec.submitProcedure( 384 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 385 386 ProcedureTestingUtility 387 .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId); 388 ProcedureTestingUtility.waitProcedure( 389 UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId); 390 391 assertProcFailed(procExec, mergeProcId); 392 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 393 } 394 395 @Test 396 public void testMergeDetectsModifyTableProcedure() throws Exception { 397 final TableName tableName = TableName.valueOf(testMethodName); 398 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 399 400 List<RegionInfo> regions = createTable(tableName); 401 402 RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true; 403 TableDescriptor td = TableDescriptorBuilder.newBuilder(admin.getDescriptor(tableName)) 404 .setRegionReplication(2).build(); 405 long modifyProcId = 406 procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), td)); 407 408 // Merge regions of the table, the MergeTableRegionsProcedure will fail because there is a 409 // ModifyTableProcedure in progress 410 MergeTableRegionsProcedure mergeProcedure = new MergeTableRegionsProcedure( 411 procExec.getEnvironment(), regions.toArray(new RegionInfo[0]), false); 412 long mergeProcId = procExec.submitProcedure(mergeProcedure); 413 ProcedureTestingUtility.waitProcedure(procExec, mergeProcId); 414 ProcedureTestingUtility.assertProcFailed(procExec, mergeProcId); 415 416 RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false; 417 ProcedureTestingUtility.waitProcedure(procExec, modifyProcId); 418 ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId); 419 } 420 421 private List<RegionInfo> createTable(final TableName tableName) throws Exception { 422 TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 423 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); 424 byte[][] splitRows = new byte[initialRegionCount - 1][]; 425 for (int i = 0; i < splitRows.length; ++i) { 426 splitRows[i] = Bytes.toBytes(String.format("%d", i)); 427 } 428 admin.createTable(desc, splitRows); 429 return assertRegionCount(tableName, initialRegionCount); 430 } 431 432 public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions) 433 throws Exception { 434 UTIL.waitUntilNoRegionsInTransition(); 435 List<RegionInfo> tableRegions = admin.getRegions(tableName); 436 assertEquals(nregions, tableRegions.size()); 437 return tableRegions; 438 } 439 440 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 441 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 442 } 443 444 private void collectAssignmentManagerMetrics() { 445 mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); 446 mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); 447 448 assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); 449 assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); 450 unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); 451 unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); 452 } 453}