001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023import java.io.IOException; 024import java.util.Collections; 025import java.util.HashSet; 026import java.util.List; 027import java.util.Map; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellBuilderFactory; 030import org.apache.hadoop.hbase.CellBuilderType; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HBaseTestingUtility; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MetaTableAccessor; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.client.Result; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 042import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 043import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 044import org.apache.hadoop.hbase.master.assignment.RegionStates; 045import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.MasterTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.hadoop.hbase.util.Threads; 052import org.junit.AfterClass; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Rule; 056import org.junit.Test; 057import org.junit.experimental.categories.Category; 058import org.junit.rules.TestName; 059 060@Category({MasterTests.class, LargeTests.class}) 061public class TestMetaFixer { 062 @ClassRule 063 public static final HBaseClassTestRule CLASS_RULE = 064 HBaseClassTestRule.forClass(TestMetaFixer.class); 065 @Rule 066 public TestName name = new TestName(); 067 068 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 069 070 @BeforeClass 071 public static void setupBeforeClass() throws Exception { 072 TEST_UTIL.startMiniCluster(); 073 } 074 075 @AfterClass 076 public static void tearDownAfterClass() throws Exception { 077 TEST_UTIL.shutdownMiniCluster(); 078 } 079 080 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 081 MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri); 082 // Delete it from Master context too else it sticks around. 083 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 084 } 085 086 @Test 087 public void testPlugsHoles() throws Exception { 088 TableName tn = TableName.valueOf(this.name.getMethodName()); 089 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 090 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 091 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 092 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 093 services.getCatalogJanitor().scan(); 094 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 095 assertTrue(report.isEmpty()); 096 int originalCount = ris.size(); 097 // Remove first, last and middle region. See if hole gets plugged. Table has 26 regions. 098 deleteRegion(services, ris.get(ris.size() -1)); 099 deleteRegion(services, ris.get(3)); 100 deleteRegion(services, ris.get(0)); 101 assertEquals(initialSize - 3, 102 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 103 services.getCatalogJanitor().scan(); 104 report = services.getCatalogJanitor().getLastReport(); 105 assertEquals(report.toString(), 3, report.getHoles().size()); 106 MetaFixer fixer = new MetaFixer(services); 107 fixer.fixHoles(report); 108 services.getCatalogJanitor().scan(); 109 report = services.getCatalogJanitor().getLastReport(); 110 assertTrue(report.toString(), report.isEmpty()); 111 assertEquals(initialSize, 112 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 113 114 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 115 // watchdog TestRule terminates the test. 116 HBaseTestingUtility.await(50, 117 () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition())); 118 119 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 120 assertEquals(originalCount, ris.size()); 121 } 122 123 /** 124 * Just make sure running fixMeta does right thing for the case 125 * of a single-region Table where the region gets dropped. 126 * There is nothing much we can do. We can't restore what 127 * we don't know about (at least from a read of hbase:meta). 128 */ 129 @Test 130 public void testOneRegionTable() throws IOException { 131 TableName tn = TableName.valueOf(this.name.getMethodName()); 132 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 133 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 134 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 135 services.getCatalogJanitor().scan(); 136 deleteRegion(services, ris.get(0)); 137 services.getCatalogJanitor().scan(); 138 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 139 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 140 assertTrue(ris.isEmpty()); 141 MetaFixer fixer = new MetaFixer(services); 142 fixer.fixHoles(report); 143 report = services.getCatalogJanitor().getLastReport(); 144 assertTrue(report.isEmpty()); 145 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 146 assertEquals(0, ris.size()); 147 } 148 149 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 150 throws IOException { 151 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()). 152 setStartKey(a.getStartKey()). 153 setEndKey(b.getEndKey()). 154 build(); 155 MetaTableAccessor.putsToMetaTable(services.getConnection(), 156 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 157 System.currentTimeMillis()))); 158 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 159 services.getAssignmentManager().assign(overlapRegion); 160 return overlapRegion; 161 } 162 163 private void testOverlapCommon(final TableName tn) throws Exception { 164 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 165 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 166 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 167 assertTrue(ris.size() > 5); 168 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 169 services.getCatalogJanitor().scan(); 170 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 171 assertTrue(report.isEmpty()); 172 // Make a simple overlap spanning second and third region. 173 makeOverlap(services, ris.get(1), ris.get(3)); 174 makeOverlap(services, ris.get(2), ris.get(3)); 175 makeOverlap(services, ris.get(2), ris.get(4)); 176 } 177 178 @Test 179 public void testOverlap() throws Exception { 180 TableName tn = TableName.valueOf(this.name.getMethodName()); 181 testOverlapCommon(tn); 182 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 183 HbckChore hbckChore = services.getHbckChore(); 184 185 CatalogJanitor cj = services.getCatalogJanitor(); 186 cj.scan(); 187 CatalogJanitor.Report report = cj.getLastReport(); 188 assertEquals(6, report.getOverlaps().size()); 189 assertEquals(1, 190 MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 191 MetaFixer fixer = new MetaFixer(services); 192 fixer.fixOverlaps(report); 193 194 HBaseTestingUtility. await(10, () -> { 195 try { 196 if (cj.scan() > 0) { 197 // It submits GC once, then it will immediately kick off another GC to test if 198 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 199 // a hole. 200 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 201 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 202 List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); 203 if (parents != null) { 204 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 205 pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), 206 e.getKey(), parents)); 207 } 208 } 209 return true; 210 } 211 return false; 212 } catch (Exception e) { 213 throw new RuntimeException(e); 214 } 215 }); 216 217 // Wait until all GCs settled down 218 HBaseTestingUtility.await(10, () -> { 219 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 220 }); 221 222 // No orphan regions on FS 223 hbckChore.chore(); 224 assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); 225 226 // No holes reported. 227 cj.scan(); 228 final CatalogJanitor.Report postReport = cj.getLastReport(); 229 assertTrue(postReport.isEmpty()); 230 } 231 232 @Test 233 public void testOverlapWithSmallMergeCount() throws Exception { 234 TableName tn = TableName.valueOf(this.name.getMethodName()); 235 try { 236 testOverlapCommon(tn); 237 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 238 CatalogJanitor cj = services.getCatalogJanitor(); 239 cj.scan(); 240 CatalogJanitor.Report report = cj.getLastReport(); 241 assertEquals(6, report.getOverlaps().size()); 242 assertEquals(2, 243 MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 244 245 // The max merge count is set to 5 so overlap regions are divided into 246 // two merge requests. 247 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt( 248 "hbase.master.metafixer.max.merge.count", 5); 249 250 // Get overlap regions 251 HashSet<String> overlapRegions = new HashSet<>(); 252 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 253 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 254 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 255 } 256 257 MetaFixer fixer = new MetaFixer(services); 258 fixer.fixOverlaps(report); 259 AssignmentManager am = services.getAssignmentManager(); 260 261 HBaseTestingUtility.await(200, () -> { 262 try { 263 cj.scan(); 264 final CatalogJanitor.Report postReport = cj.getLastReport(); 265 RegionStates regionStates = am.getRegionStates(); 266 267 // Make sure that two merged regions are opened and GCs are done. 268 if (postReport.getOverlaps().size() == 1) { 269 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 270 if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) && 271 regionStates.getRegionState(pair.getFirst()).isOpened()) && 272 (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) && 273 regionStates.getRegionState(pair.getSecond()).isOpened())) { 274 // Make sure GC is done. 275 List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions( 276 services.getConnection(), pair.getFirst().getRegionName()); 277 List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions( 278 services.getConnection(), pair.getSecond().getRegionName()); 279 280 return (firstParents == null || firstParents.isEmpty()) && 281 (secondParents == null || secondParents.isEmpty()); 282 } 283 } 284 return false; 285 } catch (Exception e) { 286 throw new RuntimeException(e); 287 } 288 }); 289 290 // Second run of fixOverlap should fix all. 291 report = cj.getLastReport(); 292 fixer.fixOverlaps(report); 293 294 HBaseTestingUtility.await(20, () -> { 295 try { 296 // Make sure it GC only once. 297 return (cj.scan() > 0); 298 } catch (Exception e) { 299 throw new RuntimeException(e); 300 } 301 }); 302 303 // No holes reported. 304 cj.scan(); 305 final CatalogJanitor.Report postReport = cj.getLastReport(); 306 assertTrue(postReport.isEmpty()); 307 308 } finally { 309 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset( 310 "hbase.master.metafixer.max.merge.count"); 311 312 TEST_UTIL.deleteTable(tn); 313 } 314 } 315 316 /** 317 * This test covers the case that one of merged parent regions is a merged child region that 318 * has not been GCed but there is no reference files anymore. In this case, it will kick off 319 * a GC procedure, but no merge will happen. 320 */ 321 @Test 322 public void testMergeWithMergedChildRegion() throws Exception { 323 TableName tn = TableName.valueOf(this.name.getMethodName()); 324 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 325 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 326 assertTrue(ris.size() > 5); 327 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 328 CatalogJanitor cj = services.getCatalogJanitor(); 329 cj.scan(); 330 CatalogJanitor.Report report = cj.getLastReport(); 331 assertTrue(report.isEmpty()); 332 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 333 334 cj.scan(); 335 report = cj.getLastReport(); 336 assertEquals(2, report.getOverlaps().size()); 337 338 // Mark it as a merged child region. 339 RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn). 340 setStartKey(overlapRegion.getStartKey()). 341 build(); 342 343 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 344 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 345 HConstants.LATEST_TIMESTAMP); 346 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 347 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow( 348 putOfMerged.getRow()). 349 setFamily(HConstants.CATALOG_FAMILY). 350 setQualifier(Bytes.toBytes(qualifier)). 351 setTimestamp(putOfMerged.getTimestamp()). 352 setType(Cell.Type.Put). 353 setValue(RegionInfo.toByteArray(fakedParentRegion)). 354 build()); 355 356 meta.put(putOfMerged); 357 358 MetaFixer fixer = new MetaFixer(services); 359 fixer.fixOverlaps(report); 360 361 // Wait until all procedures settled down 362 HBaseTestingUtility.await(200, () -> { 363 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 364 }); 365 366 // No merge is done, overlap is still there. 367 cj.scan(); 368 report = cj.getLastReport(); 369 assertEquals(2, report.getOverlaps().size()); 370 371 fixer.fixOverlaps(report); 372 373 // Wait until all procedures settled down 374 HBaseTestingUtility.await(200, () -> { 375 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 376 }); 377 378 // Merge is done and no more overlaps 379 cj.scan(); 380 report = cj.getLastReport(); 381 assertEquals(0, report.getOverlaps().size()); 382 } 383 384 /** 385 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so 386 * we can fix this condition. HBASE-24247 387 */ 388 @Test 389 public void testOverlapWithMergeOfNonContiguous() throws Exception { 390 TableName tn = TableName.valueOf(this.name.getMethodName()); 391 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 392 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 393 assertTrue(ris.size() > 5); 394 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 395 services.getCatalogJanitor().scan(); 396 CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); 397 assertTrue(report.isEmpty()); 398 // Make a simple overlap spanning second and third region. 399 makeOverlap(services, ris.get(1), ris.get(5)); 400 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 401 RegionInfo deletedRegion = ris.get(3); 402 long pid = services.getAssignmentManager().unassign(deletedRegion); 403 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 404 Threads.sleep(100); 405 } 406 GCRegionProcedure procedure = 407 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 408 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 409 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 410 Threads.sleep(100); 411 } 412 services.getCatalogJanitor().scan(); 413 report = services.getCatalogJanitor().getLastReport(); 414 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 415 MetaFixer fixer = new MetaFixer(services); 416 fixer.fixOverlaps(report); 417 HBaseTestingUtility.await(10, () -> { 418 try { 419 services.getCatalogJanitor().scan(); 420 final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport(); 421 return postReport.isEmpty(); 422 } catch (Exception e) { 423 throw new RuntimeException(e); 424 } 425 }); 426 } 427}