001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.Collections; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.CellBuilderFactory; 031import org.apache.hadoop.hbase.CellBuilderType; 032import org.apache.hadoop.hbase.HBaseClassTestRule; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.MetaTableAccessor; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.RegionInfoBuilder; 040import org.apache.hadoop.hbase.client.Result; 041import org.apache.hadoop.hbase.client.Table; 042import org.apache.hadoop.hbase.master.HMaster; 043import org.apache.hadoop.hbase.master.MasterServices; 044import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 045import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 046import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 047import org.apache.hadoop.hbase.master.assignment.RegionStates; 048import org.apache.hadoop.hbase.master.hbck.HbckChore; 049import org.apache.hadoop.hbase.master.hbck.HbckReport; 050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 051import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 052import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 053import org.apache.hadoop.hbase.testclassification.LargeTests; 054import org.apache.hadoop.hbase.testclassification.MasterTests; 055import org.apache.hadoop.hbase.util.Bytes; 056import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 057import org.apache.hadoop.hbase.util.Pair; 058import org.apache.hadoop.hbase.util.Threads; 059import org.junit.AfterClass; 060import org.junit.BeforeClass; 061import org.junit.ClassRule; 062import org.junit.Rule; 063import org.junit.Test; 064import org.junit.experimental.categories.Category; 065import org.junit.rules.TestName; 066 067@Category({ MasterTests.class, LargeTests.class }) 068public class TestMetaFixer { 069 @ClassRule 070 public static final HBaseClassTestRule CLASS_RULE = 071 HBaseClassTestRule.forClass(TestMetaFixer.class); 072 @Rule 073 public TestName name = new TestName(); 074 075 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 076 077 @BeforeClass 078 public static void setupBeforeClass() throws Exception { 079 TEST_UTIL.startMiniCluster(); 080 } 081 082 @AfterClass 083 public static void tearDownAfterClass() throws Exception { 084 TEST_UTIL.shutdownMiniCluster(); 085 } 086 087 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 088 MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri); 089 // Delete it from Master context too else it sticks around. 090 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 091 } 092 093 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 094 throws Exception { 095 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 096 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 097 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 098 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 099 services.getCatalogJanitor().scan(); 100 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 101 assertTrue(report.isEmpty()); 102 int originalCount = ris.size(); 103 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount 104 // regions. 105 for (int i = 0; i < replicaCount; i++) { 106 deleteRegion(services, ris.get(3 * replicaCount + i)); 107 deleteRegion(services, ris.get(i)); 108 deleteRegion(services, ris.get(ris.size() - 1 - i)); 109 } 110 assertEquals(initialSize - 3 * replicaCount, 111 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 112 services.getCatalogJanitor().scan(); 113 report = services.getCatalogJanitor().getLastReport(); 114 assertEquals(report.toString(), 3, report.getHoles().size()); 115 MetaFixer fixer = new MetaFixer(services); 116 fixer.fixHoles(report); 117 services.getCatalogJanitor().scan(); 118 report = services.getCatalogJanitor().getLastReport(); 119 assertTrue(report.toString(), report.isEmpty()); 120 assertEquals(initialSize, 121 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 122 123 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 124 // watchdog TestRule terminates the test. 125 HBaseTestingUtility.await(50, 126 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 127 128 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 129 assertEquals(originalCount, ris.size()); 130 } 131 132 @Test 133 public void testPlugsHoles() throws Exception { 134 TableName tn = TableName.valueOf(this.name.getMethodName()); 135 testPlugsHolesWithReadReplicaInternal(tn, 1); 136 } 137 138 @Test 139 public void testPlugsHolesWithReadReplica() throws Exception { 140 TableName tn = TableName.valueOf(this.name.getMethodName()); 141 testPlugsHolesWithReadReplicaInternal(tn, 3); 142 } 143 144 /** 145 * Just make sure running fixMeta does right thing for the case of a single-region Table where the 146 * region gets dropped. There is nothing much we can do. We can't restore what we don't know about 147 * (at least from a read of hbase:meta). 148 */ 149 @Test 150 public void testOneRegionTable() throws IOException { 151 TableName tn = TableName.valueOf(this.name.getMethodName()); 152 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 153 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 154 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 155 services.getCatalogJanitor().scan(); 156 deleteRegion(services, ris.get(0)); 157 services.getCatalogJanitor().scan(); 158 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 159 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 160 assertTrue(ris.isEmpty()); 161 MetaFixer fixer = new MetaFixer(services); 162 fixer.fixHoles(report); 163 report = services.getCatalogJanitor().getLastReport(); 164 assertTrue(report.isEmpty()); 165 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 166 assertEquals(0, ris.size()); 167 } 168 169 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 170 throws IOException { 171 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()) 172 .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build(); 173 MetaTableAccessor.putsToMetaTable(services.getConnection(), 174 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 175 EnvironmentEdgeManager.currentTime()))); 176 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 177 long assign = services.getAssignmentManager().assign(overlapRegion); 178 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign); 179 return overlapRegion; 180 } 181 182 private void testOverlapCommon(final TableName tn) throws Exception { 183 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 184 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 185 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 186 assertTrue(ris.size() > 5); 187 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 188 services.getCatalogJanitor().scan(); 189 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 190 assertTrue(report.isEmpty()); 191 // Make a simple overlap spanning second and third region. 192 makeOverlap(services, ris.get(1), ris.get(3)); 193 makeOverlap(services, ris.get(2), ris.get(3)); 194 makeOverlap(services, ris.get(2), ris.get(4)); 195 } 196 197 @Test 198 public void testOverlap() throws Exception { 199 TableName tn = TableName.valueOf(this.name.getMethodName()); 200 testOverlapCommon(tn); 201 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 202 HbckChore hbckChore = services.getHbckChore(); 203 204 CatalogJanitor cj = services.getCatalogJanitor(); 205 cj.scan(); 206 CatalogJanitorReport report = cj.getLastReport(); 207 assertEquals(6, report.getOverlaps().size()); 208 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 209 MetaFixer fixer = new MetaFixer(services); 210 fixer.fixOverlaps(report); 211 212 HBaseTestingUtility.await(10, () -> { 213 try { 214 if (cj.scan() > 0) { 215 // It submits GC once, then it will immediately kick off another GC to test if 216 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 217 // a hole. 218 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 219 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 220 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 221 if (parents != null) { 222 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 223 pe.submitProcedure( 224 new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents)); 225 } 226 } 227 return true; 228 } 229 return false; 230 } catch (Exception e) { 231 throw new RuntimeException(e); 232 } 233 }); 234 235 // Wait until all GCs settled down 236 HBaseTestingUtility.await(10, () -> { 237 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 238 }); 239 240 // No orphan regions on FS 241 hbckChore.choreForTesting(); 242 HbckReport hbckReport = hbckChore.getLastReport(); 243 assertNotNull(hbckReport); 244 assertEquals(0, hbckReport.getOrphanRegionsOnFS().size()); 245 246 // No holes reported. 247 cj.scan(); 248 final CatalogJanitorReport postReport = cj.getLastReport(); 249 assertTrue(postReport.isEmpty()); 250 } 251 252 @Test 253 public void testMultipleTableOverlaps() throws Exception { 254 TableName t1 = TableName.valueOf("t1"); 255 TableName t2 = TableName.valueOf("t2"); 256 TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY }); 257 TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY }); 258 TEST_UTIL.waitTableAvailable(t2); 259 260 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 261 services.getCatalogJanitor().scan(); 262 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 263 assertTrue(report.isEmpty()); 264 265 // Make a simple overlap for t1 266 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1); 267 makeOverlap(services, ris.get(1), ris.get(2)); 268 // Make a simple overlap for t2 269 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2); 270 makeOverlap(services, ris.get(1), ris.get(2)); 271 272 services.getCatalogJanitor().scan(); 273 report = services.getCatalogJanitor().getLastReport(); 274 assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size()); 275 276 MetaFixer fixer = new MetaFixer(services); 277 List<Long> longs = fixer.fixOverlaps(report); 278 long[] procIds = longs.stream().mapToLong(l -> l).toArray(); 279 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds); 280 281 // After fix, verify no overlaps are left. 282 services.getCatalogJanitor().scan(); 283 report = services.getCatalogJanitor().getLastReport(); 284 assertTrue("After fix there should not have been any overlaps.", report.isEmpty()); 285 } 286 287 @Test 288 public void testOverlapWithSmallMergeCount() throws Exception { 289 TableName tn = TableName.valueOf(this.name.getMethodName()); 290 try { 291 testOverlapCommon(tn); 292 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 293 CatalogJanitor cj = services.getCatalogJanitor(); 294 cj.scan(); 295 CatalogJanitorReport report = cj.getLastReport(); 296 assertEquals(6, report.getOverlaps().size()); 297 assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 298 299 // The max merge count is set to 5 so overlap regions are divided into 300 // two merge requests. 301 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 302 .setInt("hbase.master.metafixer.max.merge.count", 5); 303 304 // Get overlap regions 305 HashSet<String> overlapRegions = new HashSet<>(); 306 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 307 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 308 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 309 } 310 311 MetaFixer fixer = new MetaFixer(services); 312 fixer.fixOverlaps(report); 313 AssignmentManager am = services.getAssignmentManager(); 314 315 HBaseTestingUtility.await(200, () -> { 316 try { 317 cj.scan(); 318 final CatalogJanitorReport postReport = cj.getLastReport(); 319 RegionStates regionStates = am.getRegionStates(); 320 321 // Make sure that two merged regions are opened and GCs are done. 322 if (postReport.getOverlaps().size() == 1) { 323 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 324 if ( 325 (!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) 326 && regionStates.getRegionState(pair.getFirst()).isOpened()) 327 && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) 328 && regionStates.getRegionState(pair.getSecond()).isOpened()) 329 ) { 330 // Make sure GC is done. 331 List<RegionInfo> firstParents = MetaTableAccessor 332 .getMergeRegions(services.getConnection(), pair.getFirst().getRegionName()); 333 List<RegionInfo> secondParents = MetaTableAccessor 334 .getMergeRegions(services.getConnection(), pair.getSecond().getRegionName()); 335 336 return (firstParents == null || firstParents.isEmpty()) 337 && (secondParents == null || secondParents.isEmpty()); 338 } 339 } 340 return false; 341 } catch (Exception e) { 342 throw new RuntimeException(e); 343 } 344 }); 345 346 // Second run of fixOverlap should fix all. 347 report = cj.getLastReport(); 348 fixer.fixOverlaps(report); 349 350 HBaseTestingUtility.await(20, () -> { 351 try { 352 // Make sure it GC only once. 353 return (cj.scan() > 0); 354 } catch (Exception e) { 355 throw new RuntimeException(e); 356 } 357 }); 358 359 // No holes reported. 360 cj.scan(); 361 final CatalogJanitorReport postReport = cj.getLastReport(); 362 assertTrue(postReport.isEmpty()); 363 364 } finally { 365 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 366 .unset("hbase.master.metafixer.max.merge.count"); 367 368 TEST_UTIL.deleteTable(tn); 369 } 370 } 371 372 /** 373 * This test covers the case that one of merged parent regions is a merged child region that has 374 * not been GCed but there is no reference files anymore. In this case, it will kick off a GC 375 * procedure, but no merge will happen. 376 */ 377 @Test 378 public void testMergeWithMergedChildRegion() throws Exception { 379 TableName tn = TableName.valueOf(this.name.getMethodName()); 380 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 381 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 382 assertTrue(ris.size() > 5); 383 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 384 CatalogJanitor cj = services.getCatalogJanitor(); 385 cj.scan(); 386 CatalogJanitorReport report = cj.getLastReport(); 387 assertTrue(report.isEmpty()); 388 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 389 390 cj.scan(); 391 report = cj.getLastReport(); 392 assertEquals(2, report.getOverlaps().size()); 393 394 // Mark it as a merged child region. 395 RegionInfo fakedParentRegion = 396 RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build(); 397 398 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 399 Put putOfMerged = 400 MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP); 401 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 402 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 403 .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY) 404 .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp()) 405 .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build()); 406 407 meta.put(putOfMerged); 408 409 MetaFixer fixer = new MetaFixer(services); 410 fixer.fixOverlaps(report); 411 412 // Wait until all procedures settled down 413 HBaseTestingUtility.await(200, () -> { 414 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 415 }); 416 417 // No merge is done, overlap is still there. 418 cj.scan(); 419 report = cj.getLastReport(); 420 assertEquals(2, report.getOverlaps().size()); 421 422 fixer.fixOverlaps(report); 423 424 // Wait until all procedures settled down 425 HBaseTestingUtility.await(200, () -> { 426 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 427 }); 428 429 // Merge is done and no more overlaps 430 cj.scan(); 431 report = cj.getLastReport(); 432 assertEquals(0, report.getOverlaps().size()); 433 } 434 435 /** 436 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we 437 * can fix this condition. HBASE-24247 438 */ 439 @Test 440 public void testOverlapWithMergeOfNonContiguous() throws Exception { 441 TableName tn = TableName.valueOf(this.name.getMethodName()); 442 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 443 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 444 assertTrue(ris.size() > 5); 445 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 446 services.getCatalogJanitor().scan(); 447 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 448 assertTrue(report.isEmpty()); 449 // Make a simple overlap spanning second and third region. 450 makeOverlap(services, ris.get(1), ris.get(5)); 451 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 452 RegionInfo deletedRegion = ris.get(3); 453 long pid = services.getAssignmentManager().unassign(deletedRegion); 454 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 455 Threads.sleep(100); 456 } 457 GCRegionProcedure procedure = 458 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 459 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 460 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 461 Threads.sleep(100); 462 } 463 services.getCatalogJanitor().scan(); 464 report = services.getCatalogJanitor().getLastReport(); 465 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 466 MetaFixer fixer = new MetaFixer(services); 467 fixer.fixOverlaps(report); 468 HBaseTestingUtility.await(10, () -> { 469 try { 470 services.getCatalogJanitor().scan(); 471 final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport(); 472 return postReport.isEmpty(); 473 } catch (Exception e) { 474 throw new RuntimeException(e); 475 } 476 }); 477 } 478}