001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023import java.io.IOException; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellBuilderFactory; 030import org.apache.hadoop.hbase.CellBuilderType; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MetaTableAccessor; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.Result; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 042import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 043import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 044import org.apache.hadoop.hbase.master.assignment.RegionStates; 045import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.MasterTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.hadoop.hbase.util.Threads; 052import org.junit.AfterClass; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Rule; 056import org.junit.Test; 057import org.junit.experimental.categories.Category; 058import org.junit.rules.TestName; 059 060@Category({MasterTests.class, LargeTests.class}) 061public class TestMetaFixer { 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = 064 HBaseClassTestRule.forClass(TestMetaFixer.class); 065 @Rule 066 public TestName name = new TestName(); 067 068 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 069 070 @BeforeClass 071 public static void setupBeforeClass() throws Exception { 072 TEST_UTIL.startMiniCluster(); 073 } 074 075 @AfterClass 076 public static void tearDownAfterClass() throws Exception { 077 TEST_UTIL.shutdownMiniCluster(); 078 } 079 080 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 081 MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri); 082 // Delete it from Master context too else it sticks around. 083 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 084 } 085 086 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 087 throws Exception { 088 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 089 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 090 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 091 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 092 services.getCatalogJanitor().scan(); 093 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 094 assertTrue(report.isEmpty()); 095 int originalCount = ris.size(); 096 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount regions. 097 for (int i = 0; i < replicaCount; i ++) { 098 deleteRegion(services, ris.get(3 * replicaCount + i)); 099 deleteRegion(services, ris.get(i)); 100 deleteRegion(services, ris.get(ris.size() - 1 - i)); 101 } 102 assertEquals(initialSize - 3 * replicaCount, 103 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 104 services.getCatalogJanitor().scan(); 105 report = services.getCatalogJanitor().getLastReport(); 106 assertEquals(report.toString(), 3, report.getHoles().size()); 107 MetaFixer fixer = new MetaFixer(services); 108 fixer.fixHoles(report); 109 services.getCatalogJanitor().scan(); 110 report = services.getCatalogJanitor().getLastReport(); 111 assertTrue(report.toString(), report.isEmpty()); 112 assertEquals(initialSize, 113 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 114 115 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 116 // watchdog TestRule terminates the test. 117 HBaseTestingUtility.await(50, 118 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 119 120 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 121 assertEquals(originalCount, ris.size()); 122 } 123 124 @Test 125 public void testPlugsHoles() throws Exception { 126 TableName tn = TableName.valueOf(this.name.getMethodName()); 127 testPlugsHolesWithReadReplicaInternal(tn, 1); 128 } 129 130 @Test 131 public void testPlugsHolesWithReadReplica() throws Exception { 132 TableName tn = TableName.valueOf(this.name.getMethodName()); 133 testPlugsHolesWithReadReplicaInternal(tn, 3); 134 } 135 136 /** 137 * Just make sure running fixMeta does right thing for the case 138 * of a single-region Table where the region gets dropped. 139 * There is nothing much we can do. We can't restore what 140 * we don't know about (at least from a read of hbase:meta). 141 */ 142 @Test 143 public void testOneRegionTable() throws IOException { 144 TableName tn = TableName.valueOf(this.name.getMethodName()); 145 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 146 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 147 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 148 services.getCatalogJanitor().scan(); 149 deleteRegion(services, ris.get(0)); 150 services.getCatalogJanitor().scan(); 151 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 152 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 153 assertTrue(ris.isEmpty()); 154 MetaFixer fixer = new MetaFixer(services); 155 fixer.fixHoles(report); 156 report = services.getCatalogJanitor().getLastReport(); 157 assertTrue(report.isEmpty()); 158 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 159 assertEquals(0, ris.size()); 160 } 161 162 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 163 throws IOException { 164 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()). 165 setStartKey(a.getStartKey()). 166 setEndKey(b.getEndKey()). 167 build(); 168 MetaTableAccessor.putsToMetaTable(services.getConnection(), 169 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 170 System.currentTimeMillis()))); 171 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 172 services.getAssignmentManager().assign(overlapRegion); 173 return overlapRegion; 174 } 175 176 private void testOverlapCommon(final TableName tn) throws Exception { 177 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 178 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 179 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 180 assertTrue(ris.size() > 5); 181 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 182 services.getCatalogJanitor().scan(); 183 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 184 assertTrue(report.isEmpty()); 185 // Make a simple overlap spanning second and third region. 186 makeOverlap(services, ris.get(1), ris.get(3)); 187 makeOverlap(services, ris.get(2), ris.get(3)); 188 makeOverlap(services, ris.get(2), ris.get(4)); 189 } 190 191 @Test 192 public void testOverlap() throws Exception { 193 TableName tn = TableName.valueOf(this.name.getMethodName()); 194 testOverlapCommon(tn); 195 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 196 HbckChore hbckChore = services.getHbckChore(); 197 198 CatalogJanitor cj = services.getCatalogJanitor(); 199 cj.scan(); 200 CatalogJanitor.Report report = cj.getLastReport(); 201 assertEquals(6, report.getOverlaps().size()); 202 assertEquals(1, 203 MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 204 MetaFixer fixer = new MetaFixer(services); 205 fixer.fixOverlaps(report); 206 207 HBaseTestingUtility. await(10, () -> { 208 try { 209 if (cj.scan() > 0) { 210 // It submits GC once, then it will immediately kick off another GC to test if 211 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 212 // a hole. 213 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 214 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 215 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 216 if (parents != null) { 217 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 218 pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), 219 e.getKey(), parents)); 220 } 221 } 222 return true; 223 } 224 return false; 225 } catch (Exception e) { 226 throw new RuntimeException(e); 227 } 228 }); 229 230 // Wait until all GCs settled down 231 HBaseTestingUtility.await(10, () -> { 232 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 233 }); 234 235 // No orphan regions on FS 236 hbckChore.chore(); 237 assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); 238 239 // No holes reported. 240 cj.scan(); 241 final CatalogJanitor.Report postReport = cj.getLastReport(); 242 assertTrue(postReport.isEmpty()); 243 } 244 245 @Test 246 public void testOverlapWithSmallMergeCount() throws Exception { 247 TableName tn = TableName.valueOf(this.name.getMethodName()); 248 try { 249 testOverlapCommon(tn); 250 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 251 CatalogJanitor cj = services.getCatalogJanitor(); 252 cj.scan(); 253 CatalogJanitor.Report report = cj.getLastReport(); 254 assertEquals(6, report.getOverlaps().size()); 255 assertEquals(2, 256 MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 257 258 // The max merge count is set to 5 so overlap regions are divided into 259 // two merge requests. 260 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt( 261 "hbase.master.metafixer.max.merge.count", 5); 262 263 // Get overlap regions 264 HashSet<String> overlapRegions = new HashSet<>(); 265 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 266 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 267 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 268 } 269 270 MetaFixer fixer = new MetaFixer(services); 271 fixer.fixOverlaps(report); 272 AssignmentManager am = services.getAssignmentManager(); 273 274 HBaseTestingUtility.await(200, () -> { 275 try { 276 cj.scan(); 277 final CatalogJanitor.Report postReport = cj.getLastReport(); 278 RegionStates regionStates = am.getRegionStates(); 279 280 // Make sure that two merged regions are opened and GCs are done. 281 if (postReport.getOverlaps().size() == 1) { 282 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 283 if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) && 284 regionStates.getRegionState(pair.getFirst()).isOpened()) && 285 (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) && 286 regionStates.getRegionState(pair.getSecond()).isOpened())) { 287 // Make sure GC is done. 288 List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions( 289 services.getConnection(), pair.getFirst().getRegionName()); 290 List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions( 291 services.getConnection(), pair.getSecond().getRegionName()); 292 293 return (firstParents == null || firstParents.isEmpty()) && 294 (secondParents == null || secondParents.isEmpty()); 295 } 296 } 297 return false; 298 } catch (Exception e) { 299 throw new RuntimeException(e); 300 } 301 }); 302 303 // Second run of fixOverlap should fix all. 304 report = cj.getLastReport(); 305 fixer.fixOverlaps(report); 306 307 HBaseTestingUtility.await(20, () -> { 308 try { 309 // Make sure it GC only once. 310 return (cj.scan() > 0); 311 } catch (Exception e) { 312 throw new RuntimeException(e); 313 } 314 }); 315 316 // No holes reported. 317 cj.scan(); 318 final CatalogJanitor.Report postReport = cj.getLastReport(); 319 assertTrue(postReport.isEmpty()); 320 321 } finally { 322 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset( 323 "hbase.master.metafixer.max.merge.count"); 324 325 TEST_UTIL.deleteTable(tn); 326 } 327 } 328 329 /** 330 * This test covers the case that one of merged parent regions is a merged child region that 331 * has not been GCed but there is no reference files anymore. In this case, it will kick off 332 * a GC procedure, but no merge will happen. 333 */ 334 @Test 335 public void testMergeWithMergedChildRegion() throws Exception { 336 TableName tn = TableName.valueOf(this.name.getMethodName()); 337 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 338 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 339 assertTrue(ris.size() > 5); 340 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 341 CatalogJanitor cj = services.getCatalogJanitor(); 342 cj.scan(); 343 CatalogJanitor.Report report = cj.getLastReport(); 344 assertTrue(report.isEmpty()); 345 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 346 347 cj.scan(); 348 report = cj.getLastReport(); 349 assertEquals(2, report.getOverlaps().size()); 350 351 // Mark it as a merged child region. 352 RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn). 353 setStartKey(overlapRegion.getStartKey()). 354 build(); 355 356 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 357 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 358 HConstants.LATEST_TIMESTAMP); 359 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 360 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow( 361 putOfMerged.getRow()). 362 setFamily(HConstants.CATALOG_FAMILY). 363 setQualifier(Bytes.toBytes(qualifier)). 364 setTimestamp(putOfMerged.getTimestamp()). 365 setType(Cell.Type.Put). 366 setValue(RegionInfo.toByteArray(fakedParentRegion)). 367 build()); 368 369 meta.put(putOfMerged); 370 371 MetaFixer fixer = new MetaFixer(services); 372 fixer.fixOverlaps(report); 373 374 // Wait until all procedures settled down 375 HBaseTestingUtility.await(200, () -> { 376 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 377 }); 378 379 // No merge is done, overlap is still there. 380 cj.scan(); 381 report = cj.getLastReport(); 382 assertEquals(2, report.getOverlaps().size()); 383 384 fixer.fixOverlaps(report); 385 386 // Wait until all procedures settled down 387 HBaseTestingUtility.await(200, () -> { 388 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 389 }); 390 391 // Merge is done and no more overlaps 392 cj.scan(); 393 report = cj.getLastReport(); 394 assertEquals(0, report.getOverlaps().size()); 395 } 396 397 /** 398 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so 399 * we can fix this condition. HBASE-24247 400 */ 401 @Test 402 public void testOverlapWithMergeOfNonContiguous() throws Exception { 403 TableName tn = TableName.valueOf(this.name.getMethodName()); 404 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 405 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 406 assertTrue(ris.size() > 5); 407 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 408 services.getCatalogJanitor().scan(); 409 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 410 assertTrue(report.isEmpty()); 411 // Make a simple overlap spanning second and third region. 412 makeOverlap(services, ris.get(1), ris.get(5)); 413 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 414 RegionInfo deletedRegion = ris.get(3); 415 long pid = services.getAssignmentManager().unassign(deletedRegion); 416 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 417 Threads.sleep(100); 418 } 419 GCRegionProcedure procedure = 420 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 421 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 422 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 423 Threads.sleep(100); 424 } 425 services.getCatalogJanitor().scan(); 426 report = services.getCatalogJanitor().getLastReport(); 427 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 428 MetaFixer fixer = new MetaFixer(services); 429 fixer.fixOverlaps(report); 430 HBaseTestingUtility.await(10, () -> { 431 try { 432 services.getCatalogJanitor().scan(); 433 final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport(); 434 return postReport.isEmpty(); 435 } catch (Exception e) { 436 throw new RuntimeException(e); 437 } 438 }); 439 } 440}