001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertNotNull; 022import static org.junit.jupiter.api.Assertions.assertTrue; 023 024import java.io.IOException; 025import java.util.Collections; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import org.apache.hadoop.hbase.CatalogFamilyFormat; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.HBaseTestingUtil; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.MetaTableAccessor; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.RegionInfoBuilder; 040import org.apache.hadoop.hbase.client.Result; 041import org.apache.hadoop.hbase.client.Table; 042import org.apache.hadoop.hbase.master.HMaster; 043import org.apache.hadoop.hbase.master.MasterServices; 044import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 045import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 046import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 047import org.apache.hadoop.hbase.master.assignment.RegionStateStore; 048import org.apache.hadoop.hbase.master.assignment.RegionStates; 049import org.apache.hadoop.hbase.master.hbck.HbckChore; 050import org.apache.hadoop.hbase.master.hbck.HbckReport; 051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 052import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 053import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 054import org.apache.hadoop.hbase.testclassification.LargeTests; 055import org.apache.hadoop.hbase.testclassification.MasterTests; 056import org.apache.hadoop.hbase.util.Bytes; 057import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 058import org.apache.hadoop.hbase.util.Pair; 059import org.apache.hadoop.hbase.util.Threads; 060import org.junit.jupiter.api.AfterAll; 061import org.junit.jupiter.api.BeforeAll; 062import org.junit.jupiter.api.Tag; 063import org.junit.jupiter.api.Test; 064import org.junit.jupiter.api.TestInfo; 065 066@Tag(MasterTests.TAG) 067@Tag(LargeTests.TAG) 068public class TestMetaFixer { 069 070 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 071 072 @BeforeAll 073 public static void setupBeforeClass() throws Exception { 074 TEST_UTIL.startMiniCluster(); 075 } 076 077 @AfterAll 078 public static void tearDownAfterClass() throws Exception { 079 TEST_UTIL.shutdownMiniCluster(); 080 } 081 082 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 083 services.getAssignmentManager().getRegionStateStore().deleteRegion(ri); 084 // Delete it from Master context too else it sticks around. 085 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 086 } 087 088 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 089 throws Exception { 090 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 091 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 092 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 093 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 094 services.getCatalogJanitor().scan(); 095 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 096 assertTrue(report.isEmpty()); 097 int originalCount = ris.size(); 098 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount 099 // regions. 100 for (int i = 0; i < replicaCount; i++) { 101 deleteRegion(services, ris.get(3 * replicaCount + i)); 102 deleteRegion(services, ris.get(i)); 103 deleteRegion(services, ris.get(ris.size() - 1 - i)); 104 } 105 assertEquals(initialSize - 3 * replicaCount, 106 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 107 services.getCatalogJanitor().scan(); 108 report = services.getCatalogJanitor().getLastReport(); 109 assertEquals(3, report.getHoles().size(), report.toString()); 110 MetaFixer fixer = new MetaFixer(services); 111 fixer.fixHoles(report); 112 services.getCatalogJanitor().scan(); 113 report = services.getCatalogJanitor().getLastReport(); 114 assertTrue(report.isEmpty(), report.toString()); 115 assertEquals(initialSize, 116 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 117 118 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 119 // watchdog TestRule terminates the test. 120 HBaseTestingUtil.await(50, 121 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 122 123 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 124 assertEquals(originalCount, ris.size()); 125 } 126 127 @Test 128 public void testPlugsHoles(TestInfo testInfo) throws Exception { 129 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 130 testPlugsHolesWithReadReplicaInternal(tn, 1); 131 } 132 133 @Test 134 public void testPlugsHolesWithReadReplica(TestInfo testInfo) throws Exception { 135 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 136 testPlugsHolesWithReadReplicaInternal(tn, 3); 137 } 138 139 /** 140 * Just make sure running fixMeta does right thing for the case of a single-region Table where the 141 * region gets dropped. There is nothing much we can do. We can't restore what we don't know about 142 * (at least from a read of hbase:meta). 143 */ 144 @Test 145 public void testOneRegionTable(TestInfo testInfo) throws IOException { 146 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 147 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 148 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 149 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 150 services.getCatalogJanitor().scan(); 151 deleteRegion(services, ris.get(0)); 152 services.getCatalogJanitor().scan(); 153 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 154 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 155 assertTrue(ris.isEmpty()); 156 MetaFixer fixer = new MetaFixer(services); 157 fixer.fixHoles(report); 158 report = services.getCatalogJanitor().getLastReport(); 159 assertTrue(report.isEmpty()); 160 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 161 assertEquals(0, ris.size()); 162 } 163 164 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 165 throws IOException { 166 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()) 167 .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build(); 168 TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem()); 169 MetaTableAccessor.putsToMetaTable(services.getConnection(), 170 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 171 EnvironmentEdgeManager.currentTime()))); 172 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 173 long assign = services.getAssignmentManager().assign(overlapRegion); 174 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign); 175 return overlapRegion; 176 } 177 178 private void testOverlapCommon(final TableName tn) throws Exception { 179 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 180 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 181 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 182 assertTrue(ris.size() > 5); 183 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 184 services.getCatalogJanitor().scan(); 185 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 186 assertTrue(report.isEmpty()); 187 // Make a simple overlap spanning second and third region. 188 makeOverlap(services, ris.get(1), ris.get(3)); 189 makeOverlap(services, ris.get(2), ris.get(3)); 190 makeOverlap(services, ris.get(2), ris.get(4)); 191 } 192 193 @Test 194 public void testOverlap(TestInfo testInfo) throws Exception { 195 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 196 testOverlapCommon(tn); 197 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 198 HbckChore hbckChore = services.getHbckChore(); 199 200 CatalogJanitor cj = services.getCatalogJanitor(); 201 cj.scan(); 202 CatalogJanitorReport report = cj.getLastReport(); 203 assertEquals(6, report.getOverlaps().size()); 204 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 205 MetaFixer fixer = new MetaFixer(services); 206 fixer.fixOverlaps(report); 207 208 HBaseTestingUtil.await(10, () -> { 209 try { 210 if (cj.scan() > 0) { 211 // It submits GC once, then it will immediately kick off another GC to test if 212 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 213 // a hole. 214 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 215 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 216 List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells()); 217 if (parents != null) { 218 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 219 pe.submitProcedure( 220 new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents)); 221 } 222 } 223 return true; 224 } 225 return false; 226 } catch (Exception e) { 227 throw new RuntimeException(e); 228 } 229 }); 230 231 // Wait until all GCs settled down 232 HBaseTestingUtil.await(10, () -> { 233 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 234 }); 235 236 // No orphan regions on FS 237 hbckChore.choreForTesting(); 238 HbckReport hbckReport = hbckChore.getLastReport(); 239 assertNotNull(hbckReport); 240 assertEquals(0, hbckReport.getOrphanRegionsOnFS().size()); 241 242 // No holes reported. 243 cj.scan(); 244 final CatalogJanitorReport postReport = cj.getLastReport(); 245 assertTrue(postReport.isEmpty()); 246 } 247 248 @Test 249 public void testMultipleTableOverlaps() throws Exception { 250 TableName t1 = TableName.valueOf("t1"); 251 TableName t2 = TableName.valueOf("t2"); 252 TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY }); 253 TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY }); 254 TEST_UTIL.waitTableAvailable(t2); 255 256 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 257 services.getCatalogJanitor().scan(); 258 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 259 assertTrue(report.isEmpty()); 260 261 // Make a simple overlap for t1 262 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1); 263 makeOverlap(services, ris.get(1), ris.get(2)); 264 // Make a simple overlap for t2 265 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2); 266 makeOverlap(services, ris.get(1), ris.get(2)); 267 268 services.getCatalogJanitor().scan(); 269 report = services.getCatalogJanitor().getLastReport(); 270 assertEquals(4, report.getOverlaps().size(), "Region overlaps count does not match."); 271 272 MetaFixer fixer = new MetaFixer(services); 273 List<Long> longs = fixer.fixOverlaps(report); 274 long[] procIds = longs.stream().mapToLong(l -> l).toArray(); 275 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds); 276 277 // After fix, verify no overlaps are left. 278 services.getCatalogJanitor().scan(); 279 report = services.getCatalogJanitor().getLastReport(); 280 assertTrue(report.isEmpty(), "After fix there should not have been any overlaps."); 281 } 282 283 @Test 284 public void testOverlapWithSmallMergeCount(TestInfo testInfo) throws Exception { 285 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 286 try { 287 testOverlapCommon(tn); 288 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 289 CatalogJanitor cj = services.getCatalogJanitor(); 290 cj.scan(); 291 CatalogJanitorReport report = cj.getLastReport(); 292 assertEquals(6, report.getOverlaps().size()); 293 assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 294 295 // The max merge count is set to 5 so overlap regions are divided into 296 // two merge requests. 297 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 298 .setInt("hbase.master.metafixer.max.merge.count", 5); 299 300 // Get overlap regions 301 HashSet<String> overlapRegions = new HashSet<>(); 302 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 303 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 304 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 305 } 306 307 MetaFixer fixer = new MetaFixer(services); 308 fixer.fixOverlaps(report); 309 AssignmentManager am = services.getAssignmentManager(); 310 311 HBaseTestingUtil.await(200, () -> { 312 try { 313 cj.scan(); 314 final CatalogJanitorReport postReport = cj.getLastReport(); 315 RegionStates regionStates = am.getRegionStates(); 316 RegionStateStore regionStateStore = am.getRegionStateStore(); 317 // Make sure that two merged regions are opened and GCs are done. 318 if (postReport.getOverlaps().size() == 1) { 319 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 320 if ( 321 (!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) 322 && regionStates.getRegionState(pair.getFirst()).isOpened()) 323 && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) 324 && regionStates.getRegionState(pair.getSecond()).isOpened()) 325 ) { 326 // Make sure GC is done. 327 List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst()); 328 List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond()); 329 330 return (firstParents == null || firstParents.isEmpty()) 331 && (secondParents == null || secondParents.isEmpty()); 332 } 333 } 334 return false; 335 } catch (Exception e) { 336 throw new RuntimeException(e); 337 } 338 }); 339 340 // Second run of fixOverlap should fix all. 341 report = cj.getLastReport(); 342 fixer.fixOverlaps(report); 343 344 HBaseTestingUtil.await(20, () -> { 345 try { 346 // Make sure it GC only once. 347 return (cj.scan() > 0); 348 } catch (Exception e) { 349 throw new RuntimeException(e); 350 } 351 }); 352 353 // No holes reported. 354 cj.scan(); 355 final CatalogJanitorReport postReport = cj.getLastReport(); 356 assertTrue(postReport.isEmpty()); 357 358 } finally { 359 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 360 .unset("hbase.master.metafixer.max.merge.count"); 361 362 TEST_UTIL.deleteTable(tn); 363 } 364 } 365 366 /** 367 * This test covers the case that one of merged parent regions is a merged child region that has 368 * not been GCed but there is no reference files anymore. In this case, it will kick off a GC 369 * procedure, but no merge will happen. 370 */ 371 @Test 372 public void testMergeWithMergedChildRegion(TestInfo testInfo) throws Exception { 373 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 374 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 375 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 376 assertTrue(ris.size() > 5); 377 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 378 CatalogJanitor cj = services.getCatalogJanitor(); 379 cj.scan(); 380 CatalogJanitorReport report = cj.getLastReport(); 381 assertTrue(report.isEmpty()); 382 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 383 384 cj.scan(); 385 report = cj.getLastReport(); 386 assertEquals(2, report.getOverlaps().size()); 387 388 // Mark it as a merged child region. 389 RegionInfo fakedParentRegion = 390 RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build(); 391 392 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 393 Put putOfMerged = 394 MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP); 395 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 396 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 397 .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY) 398 .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp()) 399 .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build()); 400 401 meta.put(putOfMerged); 402 403 MetaFixer fixer = new MetaFixer(services); 404 fixer.fixOverlaps(report); 405 406 // Wait until all procedures settled down 407 HBaseTestingUtil.await(200, () -> { 408 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 409 }); 410 411 // No merge is done, overlap is still there. 412 cj.scan(); 413 report = cj.getLastReport(); 414 assertEquals(2, report.getOverlaps().size()); 415 416 fixer.fixOverlaps(report); 417 418 // Wait until all procedures settled down 419 HBaseTestingUtil.await(200, () -> { 420 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 421 }); 422 423 // Merge is done and no more overlaps 424 cj.scan(); 425 report = cj.getLastReport(); 426 assertEquals(0, report.getOverlaps().size()); 427 } 428 429 /** 430 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we 431 * can fix this condition. HBASE-24247 432 */ 433 @Test 434 public void testOverlapWithMergeOfNonContiguous(TestInfo testInfo) throws Exception { 435 TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName()); 436 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 437 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 438 assertTrue(ris.size() > 5); 439 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 440 services.getCatalogJanitor().scan(); 441 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 442 assertTrue(report.isEmpty()); 443 // Make a simple overlap spanning second and third region. 444 makeOverlap(services, ris.get(1), ris.get(5)); 445 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 446 RegionInfo deletedRegion = ris.get(3); 447 long pid = services.getAssignmentManager().unassign(deletedRegion); 448 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 449 Threads.sleep(100); 450 } 451 GCRegionProcedure procedure = 452 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 453 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 454 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 455 Threads.sleep(100); 456 } 457 services.getCatalogJanitor().scan(); 458 report = services.getCatalogJanitor().getLastReport(); 459 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 460 MetaFixer fixer = new MetaFixer(services); 461 fixer.fixOverlaps(report); 462 HBaseTestingUtil.await(10, () -> { 463 try { 464 services.getCatalogJanitor().scan(); 465 final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport(); 466 return postReport.isEmpty(); 467 } catch (Exception e) { 468 throw new RuntimeException(e); 469 } 470 }); 471 } 472}