001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023import java.io.IOException; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellBuilderFactory; 030import org.apache.hadoop.hbase.CellBuilderType; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MetaTableAccessor; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.Result; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.master.HMaster; 042import org.apache.hadoop.hbase.master.HbckChore; 043import org.apache.hadoop.hbase.master.MasterServices; 044import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 045import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 047import org.apache.hadoop.hbase.master.assignment.RegionStates; 048import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 049import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 050import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 051import org.apache.hadoop.hbase.testclassification.LargeTests; 052import org.apache.hadoop.hbase.testclassification.MasterTests; 053import org.apache.hadoop.hbase.util.Bytes; 054import org.apache.hadoop.hbase.util.Pair; 055import org.apache.hadoop.hbase.util.Threads; 056import org.junit.AfterClass; 057import org.junit.BeforeClass; 058import org.junit.ClassRule; 059import org.junit.Rule; 060import org.junit.Test; 061import org.junit.experimental.categories.Category; 062import org.junit.rules.TestName; 063 064@Category({MasterTests.class, LargeTests.class}) 065public class TestMetaFixer { 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestMetaFixer.class); 069 @Rule 070 public TestName name = new TestName(); 071 072 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 073 074 @BeforeClass 075 public static void setupBeforeClass() throws Exception { 076 TEST_UTIL.startMiniCluster(); 077 } 078 079 @AfterClass 080 public static void tearDownAfterClass() throws Exception { 081 TEST_UTIL.shutdownMiniCluster(); 082 } 083 084 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 085 MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri); 086 // Delete it from Master context too else it sticks around. 087 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 088 } 089 090 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 091 throws Exception { 092 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 093 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 094 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 095 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 096 services.getCatalogJanitor().scan(); 097 Report report = services.getCatalogJanitor().getLastReport(); 098 assertTrue(report.isEmpty()); 099 int originalCount = ris.size(); 100 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount regions. 101 for (int i = 0; i < replicaCount; i ++) { 102 deleteRegion(services, ris.get(3 * replicaCount + i)); 103 deleteRegion(services, ris.get(i)); 104 deleteRegion(services, ris.get(ris.size() - 1 - i)); 105 } 106 assertEquals(initialSize - 3 * replicaCount, 107 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 108 services.getCatalogJanitor().scan(); 109 report = services.getCatalogJanitor().getLastReport(); 110 assertEquals(report.toString(), 3, report.getHoles().size()); 111 MetaFixer fixer = new MetaFixer(services); 112 fixer.fixHoles(report); 113 services.getCatalogJanitor().scan(); 114 report = services.getCatalogJanitor().getLastReport(); 115 assertTrue(report.toString(), report.isEmpty()); 116 assertEquals(initialSize, 117 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 118 119 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 120 // watchdog TestRule terminates the test. 121 HBaseTestingUtility.await(50, 122 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 123 124 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 125 assertEquals(originalCount, ris.size()); 126 } 127 128 @Test 129 public void testPlugsHoles() throws Exception { 130 TableName tn = TableName.valueOf(this.name.getMethodName()); 131 testPlugsHolesWithReadReplicaInternal(tn, 1); 132 } 133 134 @Test 135 public void testPlugsHolesWithReadReplica() throws Exception { 136 TableName tn = TableName.valueOf(this.name.getMethodName()); 137 testPlugsHolesWithReadReplicaInternal(tn, 3); 138 } 139 140 /** 141 * Just make sure running fixMeta does right thing for the case 142 * of a single-region Table where the region gets dropped. 143 * There is nothing much we can do. We can't restore what 144 * we don't know about (at least from a read of hbase:meta). 145 */ 146 @Test 147 public void testOneRegionTable() throws IOException { 148 TableName tn = TableName.valueOf(this.name.getMethodName()); 149 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 150 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 151 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 152 services.getCatalogJanitor().scan(); 153 deleteRegion(services, ris.get(0)); 154 services.getCatalogJanitor().scan(); 155 Report report = services.getCatalogJanitor().getLastReport(); 156 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 157 assertTrue(ris.isEmpty()); 158 MetaFixer fixer = new MetaFixer(services); 159 fixer.fixHoles(report); 160 report = services.getCatalogJanitor().getLastReport(); 161 assertTrue(report.isEmpty()); 162 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 163 assertEquals(0, ris.size()); 164 } 165 166 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 167 throws IOException { 168 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()). 169 setStartKey(a.getStartKey()). 170 setEndKey(b.getEndKey()). 171 build(); 172 MetaTableAccessor.putsToMetaTable(services.getConnection(), 173 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 174 System.currentTimeMillis()))); 175 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 176 long assign = services.getAssignmentManager().assign(overlapRegion); 177 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign); 178 return overlapRegion; 179 } 180 181 private void testOverlapCommon(final TableName tn) throws Exception { 182 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 183 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 184 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 185 assertTrue(ris.size() > 5); 186 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 187 services.getCatalogJanitor().scan(); 188 Report report = services.getCatalogJanitor().getLastReport(); 189 assertTrue(report.isEmpty()); 190 // Make a simple overlap spanning second and third region. 191 makeOverlap(services, ris.get(1), ris.get(3)); 192 makeOverlap(services, ris.get(2), ris.get(3)); 193 makeOverlap(services, ris.get(2), ris.get(4)); 194 } 195 196 @Test 197 public void testOverlap() throws Exception { 198 TableName tn = TableName.valueOf(this.name.getMethodName()); 199 testOverlapCommon(tn); 200 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 201 HbckChore hbckChore = services.getHbckChore(); 202 203 CatalogJanitor cj = services.getCatalogJanitor(); 204 cj.scan(); 205 Report report = cj.getLastReport(); 206 assertEquals(6, report.getOverlaps().size()); 207 assertEquals(1, 208 MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 209 MetaFixer fixer = new MetaFixer(services); 210 fixer.fixOverlaps(report); 211 212 HBaseTestingUtility. await(10, () -> { 213 try { 214 if (cj.scan() > 0) { 215 // It submits GC once, then it will immediately kick off another GC to test if 216 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 217 // a hole. 218 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 219 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 220 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 221 if (parents != null) { 222 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 223 pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), 224 e.getKey(), parents)); 225 } 226 } 227 return true; 228 } 229 return false; 230 } catch (Exception e) { 231 throw new RuntimeException(e); 232 } 233 }); 234 235 // Wait until all GCs settled down 236 HBaseTestingUtility.await(10, () -> { 237 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 238 }); 239 240 // No orphan regions on FS 241 hbckChore.choreForTesting(); 242 assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); 243 244 // No holes reported. 245 cj.scan(); 246 final Report postReport = cj.getLastReport(); 247 assertTrue(postReport.isEmpty()); 248 } 249 250 @Test 251 public void testMultipleTableOverlaps() throws Exception { 252 TableName t1 = TableName.valueOf("t1"); 253 TableName t2 = TableName.valueOf("t2"); 254 TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY }); 255 TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY }); 256 TEST_UTIL.waitTableAvailable(t2); 257 258 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 259 services.getCatalogJanitor().scan(); 260 Report report = services.getCatalogJanitor().getLastReport(); 261 assertTrue(report.isEmpty()); 262 263 // Make a simple overlap for t1 264 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1); 265 makeOverlap(services, ris.get(1), ris.get(2)); 266 // Make a simple overlap for t2 267 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2); 268 makeOverlap(services, ris.get(1), ris.get(2)); 269 270 services.getCatalogJanitor().scan(); 271 report = services.getCatalogJanitor().getLastReport(); 272 assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size()); 273 274 MetaFixer fixer = new MetaFixer(services); 275 List<Long> longs = fixer.fixOverlaps(report); 276 long[] procIds = longs.stream().mapToLong(l -> l).toArray(); 277 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds); 278 279 // After fix, verify no overlaps are left. 280 services.getCatalogJanitor().scan(); 281 report = services.getCatalogJanitor().getLastReport(); 282 assertTrue("After fix there should not have been any overlaps.", report.isEmpty()); 283 } 284 285 @Test 286 public void testOverlapWithSmallMergeCount() throws Exception { 287 TableName tn = TableName.valueOf(this.name.getMethodName()); 288 try { 289 testOverlapCommon(tn); 290 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 291 CatalogJanitor cj = services.getCatalogJanitor(); 292 cj.scan(); 293 Report report = cj.getLastReport(); 294 assertEquals(6, report.getOverlaps().size()); 295 assertEquals(2, 296 MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 297 298 // The max merge count is set to 5 so overlap regions are divided into 299 // two merge requests. 300 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt( 301 "hbase.master.metafixer.max.merge.count", 5); 302 303 // Get overlap regions 304 HashSet<String> overlapRegions = new HashSet<>(); 305 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 306 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 307 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 308 } 309 310 MetaFixer fixer = new MetaFixer(services); 311 fixer.fixOverlaps(report); 312 AssignmentManager am = services.getAssignmentManager(); 313 314 HBaseTestingUtility.await(200, () -> { 315 try { 316 cj.scan(); 317 final Report postReport = cj.getLastReport(); 318 RegionStates regionStates = am.getRegionStates(); 319 320 // Make sure that two merged regions are opened and GCs are done. 321 if (postReport.getOverlaps().size() == 1) { 322 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 323 if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) && 324 regionStates.getRegionState(pair.getFirst()).isOpened()) && 325 (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) && 326 regionStates.getRegionState(pair.getSecond()).isOpened())) { 327 // Make sure GC is done. 328 List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions( 329 services.getConnection(), pair.getFirst().getRegionName()); 330 List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions( 331 services.getConnection(), pair.getSecond().getRegionName()); 332 333 return (firstParents == null || firstParents.isEmpty()) && 334 (secondParents == null || secondParents.isEmpty()); 335 } 336 } 337 return false; 338 } catch (Exception e) { 339 throw new RuntimeException(e); 340 } 341 }); 342 343 // Second run of fixOverlap should fix all. 344 report = cj.getLastReport(); 345 fixer.fixOverlaps(report); 346 347 HBaseTestingUtility.await(20, () -> { 348 try { 349 // Make sure it GC only once. 350 return (cj.scan() > 0); 351 } catch (Exception e) { 352 throw new RuntimeException(e); 353 } 354 }); 355 356 // No holes reported. 357 cj.scan(); 358 final Report postReport = cj.getLastReport(); 359 assertTrue(postReport.isEmpty()); 360 361 } finally { 362 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset( 363 "hbase.master.metafixer.max.merge.count"); 364 365 TEST_UTIL.deleteTable(tn); 366 } 367 } 368 369 /** 370 * This test covers the case that one of merged parent regions is a merged child region that 371 * has not been GCed but there is no reference files anymore. In this case, it will kick off 372 * a GC procedure, but no merge will happen. 373 */ 374 @Test 375 public void testMergeWithMergedChildRegion() throws Exception { 376 TableName tn = TableName.valueOf(this.name.getMethodName()); 377 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 378 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 379 assertTrue(ris.size() > 5); 380 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 381 CatalogJanitor cj = services.getCatalogJanitor(); 382 cj.scan(); 383 Report report = cj.getLastReport(); 384 assertTrue(report.isEmpty()); 385 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 386 387 cj.scan(); 388 report = cj.getLastReport(); 389 assertEquals(2, report.getOverlaps().size()); 390 391 // Mark it as a merged child region. 392 RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn). 393 setStartKey(overlapRegion.getStartKey()). 394 build(); 395 396 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 397 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 398 HConstants.LATEST_TIMESTAMP); 399 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 400 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow( 401 putOfMerged.getRow()). 402 setFamily(HConstants.CATALOG_FAMILY). 403 setQualifier(Bytes.toBytes(qualifier)). 404 setTimestamp(putOfMerged.getTimestamp()). 405 setType(Cell.Type.Put). 406 setValue(RegionInfo.toByteArray(fakedParentRegion)). 407 build()); 408 409 meta.put(putOfMerged); 410 411 MetaFixer fixer = new MetaFixer(services); 412 fixer.fixOverlaps(report); 413 414 // Wait until all procedures settled down 415 HBaseTestingUtility.await(200, () -> { 416 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 417 }); 418 419 // No merge is done, overlap is still there. 420 cj.scan(); 421 report = cj.getLastReport(); 422 assertEquals(2, report.getOverlaps().size()); 423 424 fixer.fixOverlaps(report); 425 426 // Wait until all procedures settled down 427 HBaseTestingUtility.await(200, () -> { 428 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 429 }); 430 431 // Merge is done and no more overlaps 432 cj.scan(); 433 report = cj.getLastReport(); 434 assertEquals(0, report.getOverlaps().size()); 435 } 436 437 /** 438 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so 439 * we can fix this condition. HBASE-24247 440 */ 441 @Test 442 public void testOverlapWithMergeOfNonContiguous() throws Exception { 443 TableName tn = TableName.valueOf(this.name.getMethodName()); 444 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 445 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 446 assertTrue(ris.size() > 5); 447 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 448 services.getCatalogJanitor().scan(); 449 Report report = services.getCatalogJanitor().getLastReport(); 450 assertTrue(report.isEmpty()); 451 // Make a simple overlap spanning second and third region. 452 makeOverlap(services, ris.get(1), ris.get(5)); 453 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 454 RegionInfo deletedRegion = ris.get(3); 455 long pid = services.getAssignmentManager().unassign(deletedRegion); 456 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 457 Threads.sleep(100); 458 } 459 GCRegionProcedure procedure = 460 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 461 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 462 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 463 Threads.sleep(100); 464 } 465 services.getCatalogJanitor().scan(); 466 report = services.getCatalogJanitor().getLastReport(); 467 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 468 MetaFixer fixer = new MetaFixer(services); 469 fixer.fixOverlaps(report); 470 HBaseTestingUtility.await(10, () -> { 471 try { 472 services.getCatalogJanitor().scan(); 473 final Report postReport = services.getCatalogJanitor().getLastReport(); 474 return postReport.isEmpty(); 475 } catch (Exception e) { 476 throw new RuntimeException(e); 477 } 478 }); 479 } 480}