001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.List; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtility; 029import org.apache.hadoop.hbase.HColumnDescriptor; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.HTableDescriptor; 032import org.apache.hadoop.hbase.MetaTableAccessor; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.Admin; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.Table; 038import org.apache.hadoop.hbase.client.TableDescriptor; 039import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; 040import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 041import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; 042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 043import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 044import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 045import org.apache.hadoop.hbase.regionserver.HRegion; 046import org.apache.hadoop.hbase.testclassification.MasterTests; 047import org.apache.hadoop.hbase.testclassification.MediumTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.Threads; 050import org.junit.After; 051import org.junit.AfterClass; 052import org.junit.Before; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Rule; 056import org.junit.Test; 057import org.junit.experimental.categories.Category; 058import org.junit.rules.TestName; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062@Category({MasterTests.class, MediumTests.class}) 063public class TestMergeTableRegionsProcedure { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class); 068 069 private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class); 070 @Rule public final TestName name = new TestName(); 071 072 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 073 private static final int initialRegionCount = 4; 074 private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); 075 private static Admin admin; 076 077 private ProcedureMetrics mergeProcMetrics; 078 private ProcedureMetrics assignProcMetrics; 079 private ProcedureMetrics unassignProcMetrics; 080 private long mergeSubmittedCount = 0; 081 private long mergeFailedCount = 0; 082 private long assignSubmittedCount = 0; 083 private long assignFailedCount = 0; 084 private long unassignSubmittedCount = 0; 085 private long unassignFailedCount = 0; 086 087 private static void setupConf(Configuration conf) { 088 // Reduce the maximum attempts to speed up the test 089 conf.setInt("hbase.assignment.maximum.attempts", 3); 090 conf.setInt("hbase.master.maximum.ping.server.attempts", 3); 091 conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1); 092 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 093 conf.setInt(MasterProcedureConstants.MASTER_URGENT_PROCEDURE_THREADS, 0); 094 } 095 096 @BeforeClass 097 public static void setupCluster() throws Exception { 098 setupConf(UTIL.getConfiguration()); 099 UTIL.startMiniCluster(1); 100 admin = UTIL.getHBaseAdmin(); 101 } 102 103 @AfterClass 104 public static void cleanupTest() throws Exception { 105 try { 106 UTIL.shutdownMiniCluster(); 107 } catch (Exception e) { 108 LOG.warn("failure shutting down cluster", e); 109 } 110 } 111 112 @Before 113 public void setup() throws Exception { 114 resetProcExecutorTestingKillFlag(); 115 MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); 116 MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); 117 // Turn off balancer so it doesn't cut in and mess up our placements. 118 UTIL.getHBaseAdmin().setBalancerRunning(false, true); 119 // Turn off the meta scanner so it don't remove parent on us. 120 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 121 resetProcExecutorTestingKillFlag(); 122 AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 123 mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); 124 assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); 125 unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); 126 } 127 128 @After 129 public void tearDown() throws Exception { 130 resetProcExecutorTestingKillFlag(); 131 for (TableDescriptor htd: UTIL.getAdmin().listTables()) { 132 LOG.info("Tear down, remove table=" + htd.getTableName()); 133 UTIL.deleteTable(htd.getTableName()); 134 } 135 } 136 137 private void resetProcExecutorTestingKillFlag() { 138 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 139 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 140 assertTrue("expected executor to be running", procExec.isRunning()); 141 } 142 143 private int loadARowPerRegion(final Table t, List<RegionInfo> ris) 144 throws IOException { 145 List<Put> puts = new ArrayList<>(); 146 for (RegionInfo ri: ris) { 147 Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0? 148 new byte [] {'a'}: ri.getStartKey()); 149 put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY, 150 HConstants.CATALOG_FAMILY); 151 puts.add(put); 152 } 153 t.put(puts); 154 return puts.size(); 155 } 156 157 158 /** 159 * This tests two region merges 160 */ 161 @Test 162 public void testMergeTwoRegions() throws Exception { 163 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 164 UTIL.createTable(tableName, new byte[][]{HConstants.CATALOG_FAMILY}, 165 new byte[][]{new byte[]{'b'}, new byte[]{'c'}, new byte[]{'d'}, new byte[]{'e'}}); 166 testMerge(tableName, 2); 167 } 168 169 private void testMerge(TableName tableName, int mergeCount) throws IOException { 170 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName); 171 int originalRegionCount = ris.size(); 172 assertTrue(originalRegionCount > mergeCount); 173 RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo [] {}); 174 int countOfRowsLoaded = 0; 175 try (Table table = UTIL.getConnection().getTable(tableName)) { 176 countOfRowsLoaded = loadARowPerRegion(table, ris); 177 } 178 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 179 180 // collect AM metrics before test 181 collectAssignmentManagerMetrics(); 182 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 183 MergeTableRegionsProcedure proc = 184 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true); 185 long procId = procExec.submitProcedure(proc); 186 ProcedureTestingUtility.waitProcedure(procExec, procId); 187 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 188 MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection()); 189 assertEquals(originalRegionCount - mergeCount + 1, 190 MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size()); 191 192 assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); 193 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 194 assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); 195 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 196 assertEquals(unassignSubmittedCount + mergeCount, 197 unassignProcMetrics.getSubmittedCounter().getCount()); 198 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 199 200 // Need to get the references cleaned out. Close of region will move them 201 // to archive so disable and reopen just to get rid of references to later 202 // when the catalogjanitor runs, it can do merged region cleanup. 203 admin.disableTable(tableName); 204 admin.enableTable(tableName); 205 206 // Can I purge the merged regions from hbase:meta? Check that all went 207 // well by looking at the merged row up in hbase:meta. It should have no 208 // more mention of the merged regions; they are purged as last step in 209 // the merged regions cleanup. 210 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); 211 UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow(); 212 byte [] mergedRegion = proc.getMergedRegion().getRegionName(); 213 while (ris != null && ris.get(0) != null && ris.get(1) != null) { 214 ris = MetaTableAccessor.getMergeRegions(UTIL.getConnection(), mergedRegion); 215 LOG.info("{} {}", Bytes.toStringBinary(mergedRegion), ris); 216 Threads.sleep(1000); 217 } 218 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 219 } 220 221 /** 222 * This tests ten region merges in one go. 223 */ 224 @Test 225 public void testMergeTenRegions() throws Exception { 226 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 227 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 228 UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY); 229 testMerge(tableName, 10); 230 } 231 232 /** 233 * This tests two concurrent region merges 234 */ 235 @Test 236 public void testMergeRegionsConcurrently() throws Exception { 237 final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently"); 238 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 239 240 List<RegionInfo> tableRegions = createTable(tableName); 241 242 RegionInfo[] regionsToMerge1 = new RegionInfo[2]; 243 RegionInfo[] regionsToMerge2 = new RegionInfo[2]; 244 regionsToMerge1[0] = tableRegions.get(0); 245 regionsToMerge1[1] = tableRegions.get(1); 246 regionsToMerge2[0] = tableRegions.get(2); 247 regionsToMerge2[1] = tableRegions.get(3); 248 249 // collect AM metrics before test 250 collectAssignmentManagerMetrics(); 251 252 long procId1 = procExec.submitProcedure(new MergeTableRegionsProcedure( 253 procExec.getEnvironment(), regionsToMerge1, true)); 254 long procId2 = procExec.submitProcedure(new MergeTableRegionsProcedure( 255 procExec.getEnvironment(), regionsToMerge2, true)); 256 ProcedureTestingUtility.waitProcedure(procExec, procId1); 257 ProcedureTestingUtility.waitProcedure(procExec, procId2); 258 ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); 259 ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); 260 assertRegionCount(tableName, initialRegionCount - 2); 261 262 assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); 263 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 264 assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); 265 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 266 assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount()); 267 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 268 } 269 270 @Test 271 public void testRecoveryAndDoubleExecution() throws Exception { 272 final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution"); 273 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 274 275 List<RegionInfo> tableRegions = createTable(tableName); 276 277 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 278 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 279 280 RegionInfo[] regionsToMerge = new RegionInfo[2]; 281 regionsToMerge[0] = tableRegions.get(0); 282 regionsToMerge[1] = tableRegions.get(1); 283 284 long procId = procExec.submitProcedure( 285 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 286 287 // Restart the executor and execute the step twice 288 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); 289 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 290 291 assertRegionCount(tableName, initialRegionCount - 1); 292 } 293 294 @Test 295 public void testRollbackAndDoubleExecution() throws Exception { 296 final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution"); 297 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 298 299 List<RegionInfo> tableRegions = createTable(tableName); 300 301 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 302 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 303 304 RegionInfo[] regionsToMerge = new RegionInfo[2]; 305 regionsToMerge[0] = tableRegions.get(0); 306 regionsToMerge[1] = tableRegions.get(1); 307 308 long procId = procExec.submitProcedure( 309 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 310 311 // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback 312 // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is 313 // hardcoded, so you have to look at this test at least once when you add a new step. 314 int lastStep = 8; 315 MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true); 316 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 317 UTIL.waitUntilAllRegionsAssigned(tableName); 318 List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName); 319 assertEquals(initialRegionCount, regions.size()); 320 } 321 322 @Test 323 public void testMergeWithoutPONR() throws Exception { 324 final TableName tableName = TableName.valueOf("testMergeWithoutPONR"); 325 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 326 327 List<RegionInfo> tableRegions = createTable(tableName); 328 329 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 330 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 331 332 RegionInfo[] regionsToMerge = new RegionInfo[2]; 333 regionsToMerge[0] = tableRegions.get(0); 334 regionsToMerge[1] = tableRegions.get(1); 335 336 long procId = procExec.submitProcedure( 337 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 338 339 // Execute until step 9 of split procedure 340 // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META 341 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false); 342 343 // Unset Toggle Kill and make ProcExec work correctly 344 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 345 MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); 346 ProcedureTestingUtility.waitProcedure(procExec, procId); 347 348 assertRegionCount(tableName, initialRegionCount - 1); 349 } 350 351 private List<RegionInfo> createTable(final TableName tableName) 352 throws Exception { 353 HTableDescriptor desc = new HTableDescriptor(tableName); 354 desc.addFamily(new HColumnDescriptor(FAMILY)); 355 byte[][] splitRows = new byte[initialRegionCount - 1][]; 356 for (int i = 0; i < splitRows.length; ++i) { 357 splitRows[i] = Bytes.toBytes(String.format("%d", i)); 358 } 359 admin.createTable(desc, splitRows); 360 return assertRegionCount(tableName, initialRegionCount); 361 } 362 363 public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions) 364 throws Exception { 365 UTIL.waitUntilNoRegionsInTransition(); 366 List<RegionInfo> tableRegions = admin.getRegions(tableName); 367 assertEquals(nregions, tableRegions.size()); 368 return tableRegions; 369 } 370 371 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 372 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 373 } 374 375 private void collectAssignmentManagerMetrics() { 376 mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); 377 mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); 378 379 assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); 380 assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); 381 unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); 382 unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); 383 } 384}