001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.Collections; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import org.apache.hadoop.hbase.CatalogFamilyFormat; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.MetaTableAccessor; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.Put; 039import org.apache.hadoop.hbase.client.RegionInfo; 040import org.apache.hadoop.hbase.client.RegionInfoBuilder; 041import org.apache.hadoop.hbase.client.Result; 042import org.apache.hadoop.hbase.client.Table; 043import org.apache.hadoop.hbase.master.HMaster; 044import org.apache.hadoop.hbase.master.MasterServices; 045import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 047import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 048import org.apache.hadoop.hbase.master.assignment.RegionStateStore; 049import org.apache.hadoop.hbase.master.assignment.RegionStates; 050import org.apache.hadoop.hbase.master.hbck.HbckChore; 051import org.apache.hadoop.hbase.master.hbck.HbckReport; 052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 054import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 055import org.apache.hadoop.hbase.testclassification.LargeTests; 056import org.apache.hadoop.hbase.testclassification.MasterTests; 057import org.apache.hadoop.hbase.util.Bytes; 058import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 059import org.apache.hadoop.hbase.util.Pair; 060import org.apache.hadoop.hbase.util.Threads; 061import org.junit.AfterClass; 062import org.junit.BeforeClass; 063import org.junit.ClassRule; 064import org.junit.Rule; 065import org.junit.Test; 066import org.junit.experimental.categories.Category; 067import org.junit.rules.TestName; 068 069@Category({ MasterTests.class, LargeTests.class }) 070public class TestMetaFixer { 071 @ClassRule 072 public static final HBaseClassTestRule CLASS_RULE = 073 HBaseClassTestRule.forClass(TestMetaFixer.class); 074 @Rule 075 public TestName name = new TestName(); 076 077 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 078 079 @BeforeClass 080 public static void setupBeforeClass() throws Exception { 081 TEST_UTIL.startMiniCluster(); 082 } 083 084 @AfterClass 085 public static void tearDownAfterClass() throws Exception { 086 TEST_UTIL.shutdownMiniCluster(); 087 } 088 089 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 090 services.getAssignmentManager().getRegionStateStore().deleteRegion(ri); 091 // Delete it from Master context too else it sticks around. 092 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 093 } 094 095 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 096 throws Exception { 097 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 098 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 099 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 100 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 101 services.getCatalogJanitor().scan(); 102 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 103 assertTrue(report.isEmpty()); 104 int originalCount = ris.size(); 105 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount 106 // regions. 107 for (int i = 0; i < replicaCount; i++) { 108 deleteRegion(services, ris.get(3 * replicaCount + i)); 109 deleteRegion(services, ris.get(i)); 110 deleteRegion(services, ris.get(ris.size() - 1 - i)); 111 } 112 assertEquals(initialSize - 3 * replicaCount, 113 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 114 services.getCatalogJanitor().scan(); 115 report = services.getCatalogJanitor().getLastReport(); 116 assertEquals(report.toString(), 3, report.getHoles().size()); 117 MetaFixer fixer = new MetaFixer(services); 118 fixer.fixHoles(report); 119 services.getCatalogJanitor().scan(); 120 report = services.getCatalogJanitor().getLastReport(); 121 assertTrue(report.toString(), report.isEmpty()); 122 assertEquals(initialSize, 123 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 124 125 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 126 // watchdog TestRule terminates the test. 127 HBaseTestingUtil.await(50, 128 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 129 130 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 131 assertEquals(originalCount, ris.size()); 132 } 133 134 @Test 135 public void testPlugsHoles() throws Exception { 136 TableName tn = TableName.valueOf(this.name.getMethodName()); 137 testPlugsHolesWithReadReplicaInternal(tn, 1); 138 } 139 140 @Test 141 public void testPlugsHolesWithReadReplica() throws Exception { 142 TableName tn = TableName.valueOf(this.name.getMethodName()); 143 testPlugsHolesWithReadReplicaInternal(tn, 3); 144 } 145 146 /** 147 * Just make sure running fixMeta does right thing for the case of a single-region Table where the 148 * region gets dropped. There is nothing much we can do. We can't restore what we don't know about 149 * (at least from a read of hbase:meta). 150 */ 151 @Test 152 public void testOneRegionTable() throws IOException { 153 TableName tn = TableName.valueOf(this.name.getMethodName()); 154 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 155 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 156 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 157 services.getCatalogJanitor().scan(); 158 deleteRegion(services, ris.get(0)); 159 services.getCatalogJanitor().scan(); 160 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 161 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 162 assertTrue(ris.isEmpty()); 163 MetaFixer fixer = new MetaFixer(services); 164 fixer.fixHoles(report); 165 report = services.getCatalogJanitor().getLastReport(); 166 assertTrue(report.isEmpty()); 167 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 168 assertEquals(0, ris.size()); 169 } 170 171 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 172 throws IOException { 173 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()) 174 .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build(); 175 TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem()); 176 MetaTableAccessor.putsToMetaTable(services.getConnection(), 177 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 178 EnvironmentEdgeManager.currentTime()))); 179 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 180 long assign = services.getAssignmentManager().assign(overlapRegion); 181 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign); 182 return overlapRegion; 183 } 184 185 private void testOverlapCommon(final TableName tn) throws Exception { 186 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 187 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 188 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 189 assertTrue(ris.size() > 5); 190 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 191 services.getCatalogJanitor().scan(); 192 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 193 assertTrue(report.isEmpty()); 194 // Make a simple overlap spanning second and third region. 195 makeOverlap(services, ris.get(1), ris.get(3)); 196 makeOverlap(services, ris.get(2), ris.get(3)); 197 makeOverlap(services, ris.get(2), ris.get(4)); 198 } 199 200 @Test 201 public void testOverlap() throws Exception { 202 TableName tn = TableName.valueOf(this.name.getMethodName()); 203 testOverlapCommon(tn); 204 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 205 HbckChore hbckChore = services.getHbckChore(); 206 207 CatalogJanitor cj = services.getCatalogJanitor(); 208 cj.scan(); 209 CatalogJanitorReport report = cj.getLastReport(); 210 assertEquals(6, report.getOverlaps().size()); 211 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 212 MetaFixer fixer = new MetaFixer(services); 213 fixer.fixOverlaps(report); 214 215 HBaseTestingUtil.await(10, () -> { 216 try { 217 if (cj.scan() > 0) { 218 // It submits GC once, then it will immediately kick off another GC to test if 219 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 220 // a hole. 221 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 222 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 223 List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells()); 224 if (parents != null) { 225 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 226 pe.submitProcedure( 227 new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents)); 228 } 229 } 230 return true; 231 } 232 return false; 233 } catch (Exception e) { 234 throw new RuntimeException(e); 235 } 236 }); 237 238 // Wait until all GCs settled down 239 HBaseTestingUtil.await(10, () -> { 240 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 241 }); 242 243 // No orphan regions on FS 244 hbckChore.choreForTesting(); 245 HbckReport hbckReport = hbckChore.getLastReport(); 246 assertNotNull(hbckReport); 247 assertEquals(0, hbckReport.getOrphanRegionsOnFS().size()); 248 249 // No holes reported. 250 cj.scan(); 251 final CatalogJanitorReport postReport = cj.getLastReport(); 252 assertTrue(postReport.isEmpty()); 253 } 254 255 @Test 256 public void testMultipleTableOverlaps() throws Exception { 257 TableName t1 = TableName.valueOf("t1"); 258 TableName t2 = TableName.valueOf("t2"); 259 TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY }); 260 TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY }); 261 TEST_UTIL.waitTableAvailable(t2); 262 263 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 264 services.getCatalogJanitor().scan(); 265 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 266 assertTrue(report.isEmpty()); 267 268 // Make a simple overlap for t1 269 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1); 270 makeOverlap(services, ris.get(1), ris.get(2)); 271 // Make a simple overlap for t2 272 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2); 273 makeOverlap(services, ris.get(1), ris.get(2)); 274 275 services.getCatalogJanitor().scan(); 276 report = services.getCatalogJanitor().getLastReport(); 277 assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size()); 278 279 MetaFixer fixer = new MetaFixer(services); 280 List<Long> longs = fixer.fixOverlaps(report); 281 long[] procIds = longs.stream().mapToLong(l -> l).toArray(); 282 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds); 283 284 // After fix, verify no overlaps are left. 285 services.getCatalogJanitor().scan(); 286 report = services.getCatalogJanitor().getLastReport(); 287 assertTrue("After fix there should not have been any overlaps.", report.isEmpty()); 288 } 289 290 @Test 291 public void testOverlapWithSmallMergeCount() throws Exception { 292 TableName tn = TableName.valueOf(this.name.getMethodName()); 293 try { 294 testOverlapCommon(tn); 295 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 296 CatalogJanitor cj = services.getCatalogJanitor(); 297 cj.scan(); 298 CatalogJanitorReport report = cj.getLastReport(); 299 assertEquals(6, report.getOverlaps().size()); 300 assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 301 302 // The max merge count is set to 5 so overlap regions are divided into 303 // two merge requests. 304 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 305 .setInt("hbase.master.metafixer.max.merge.count", 5); 306 307 // Get overlap regions 308 HashSet<String> overlapRegions = new HashSet<>(); 309 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 310 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 311 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 312 } 313 314 MetaFixer fixer = new MetaFixer(services); 315 fixer.fixOverlaps(report); 316 AssignmentManager am = services.getAssignmentManager(); 317 318 HBaseTestingUtil.await(200, () -> { 319 try { 320 cj.scan(); 321 final CatalogJanitorReport postReport = cj.getLastReport(); 322 RegionStates regionStates = am.getRegionStates(); 323 RegionStateStore regionStateStore = am.getRegionStateStore(); 324 // Make sure that two merged regions are opened and GCs are done. 325 if (postReport.getOverlaps().size() == 1) { 326 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 327 if ( 328 (!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) 329 && regionStates.getRegionState(pair.getFirst()).isOpened()) 330 && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) 331 && regionStates.getRegionState(pair.getSecond()).isOpened()) 332 ) { 333 // Make sure GC is done. 334 List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst()); 335 List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond()); 336 337 return (firstParents == null || firstParents.isEmpty()) 338 && (secondParents == null || secondParents.isEmpty()); 339 } 340 } 341 return false; 342 } catch (Exception e) { 343 throw new RuntimeException(e); 344 } 345 }); 346 347 // Second run of fixOverlap should fix all. 348 report = cj.getLastReport(); 349 fixer.fixOverlaps(report); 350 351 HBaseTestingUtil.await(20, () -> { 352 try { 353 // Make sure it GC only once. 354 return (cj.scan() > 0); 355 } catch (Exception e) { 356 throw new RuntimeException(e); 357 } 358 }); 359 360 // No holes reported. 361 cj.scan(); 362 final CatalogJanitorReport postReport = cj.getLastReport(); 363 assertTrue(postReport.isEmpty()); 364 365 } finally { 366 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 367 .unset("hbase.master.metafixer.max.merge.count"); 368 369 TEST_UTIL.deleteTable(tn); 370 } 371 } 372 373 /** 374 * This test covers the case that one of merged parent regions is a merged child region that has 375 * not been GCed but there is no reference files anymore. In this case, it will kick off a GC 376 * procedure, but no merge will happen. 377 */ 378 @Test 379 public void testMergeWithMergedChildRegion() throws Exception { 380 TableName tn = TableName.valueOf(this.name.getMethodName()); 381 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 382 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 383 assertTrue(ris.size() > 5); 384 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 385 CatalogJanitor cj = services.getCatalogJanitor(); 386 cj.scan(); 387 CatalogJanitorReport report = cj.getLastReport(); 388 assertTrue(report.isEmpty()); 389 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 390 391 cj.scan(); 392 report = cj.getLastReport(); 393 assertEquals(2, report.getOverlaps().size()); 394 395 // Mark it as a merged child region. 396 RegionInfo fakedParentRegion = 397 RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build(); 398 399 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 400 Put putOfMerged = 401 MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP); 402 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 403 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 404 .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY) 405 .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp()) 406 .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build()); 407 408 meta.put(putOfMerged); 409 410 MetaFixer fixer = new MetaFixer(services); 411 fixer.fixOverlaps(report); 412 413 // Wait until all procedures settled down 414 HBaseTestingUtil.await(200, () -> { 415 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 416 }); 417 418 // No merge is done, overlap is still there. 419 cj.scan(); 420 report = cj.getLastReport(); 421 assertEquals(2, report.getOverlaps().size()); 422 423 fixer.fixOverlaps(report); 424 425 // Wait until all procedures settled down 426 HBaseTestingUtil.await(200, () -> { 427 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 428 }); 429 430 // Merge is done and no more overlaps 431 cj.scan(); 432 report = cj.getLastReport(); 433 assertEquals(0, report.getOverlaps().size()); 434 } 435 436 /** 437 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we 438 * can fix this condition. HBASE-24247 439 */ 440 @Test 441 public void testOverlapWithMergeOfNonContiguous() throws Exception { 442 TableName tn = TableName.valueOf(this.name.getMethodName()); 443 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 444 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 445 assertTrue(ris.size() > 5); 446 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 447 services.getCatalogJanitor().scan(); 448 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 449 assertTrue(report.isEmpty()); 450 // Make a simple overlap spanning second and third region. 451 makeOverlap(services, ris.get(1), ris.get(5)); 452 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 453 RegionInfo deletedRegion = ris.get(3); 454 long pid = services.getAssignmentManager().unassign(deletedRegion); 455 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 456 Threads.sleep(100); 457 } 458 GCRegionProcedure procedure = 459 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 460 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 461 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 462 Threads.sleep(100); 463 } 464 services.getCatalogJanitor().scan(); 465 report = services.getCatalogJanitor().getLastReport(); 466 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 467 MetaFixer fixer = new MetaFixer(services); 468 fixer.fixOverlaps(report); 469 HBaseTestingUtil.await(10, () -> { 470 try { 471 services.getCatalogJanitor().scan(); 472 final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport(); 473 return postReport.isEmpty(); 474 } catch (Exception e) { 475 throw new RuntimeException(e); 476 } 477 }); 478 } 479}