001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertNotNull;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import org.apache.hadoop.hbase.CatalogFamilyFormat;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellBuilderFactory;
032import org.apache.hadoop.hbase.CellBuilderType;
033import org.apache.hadoop.hbase.HBaseTestingUtil;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.MetaTableAccessor;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.RegionInfoBuilder;
040import org.apache.hadoop.hbase.client.Result;
041import org.apache.hadoop.hbase.client.Table;
042import org.apache.hadoop.hbase.master.HMaster;
043import org.apache.hadoop.hbase.master.MasterServices;
044import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
045import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
046import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
047import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
048import org.apache.hadoop.hbase.master.assignment.RegionStates;
049import org.apache.hadoop.hbase.master.hbck.HbckChore;
050import org.apache.hadoop.hbase.master.hbck.HbckReport;
051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
052import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
053import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
054import org.apache.hadoop.hbase.testclassification.LargeTests;
055import org.apache.hadoop.hbase.testclassification.MasterTests;
056import org.apache.hadoop.hbase.util.Bytes;
057import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
058import org.apache.hadoop.hbase.util.Pair;
059import org.apache.hadoop.hbase.util.Threads;
060import org.junit.jupiter.api.AfterAll;
061import org.junit.jupiter.api.BeforeAll;
062import org.junit.jupiter.api.Tag;
063import org.junit.jupiter.api.Test;
064import org.junit.jupiter.api.TestInfo;
065
066@Tag(MasterTests.TAG)
067@Tag(LargeTests.TAG)
068public class TestMetaFixer {
069
070  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
071
072  @BeforeAll
073  public static void setupBeforeClass() throws Exception {
074    TEST_UTIL.startMiniCluster();
075  }
076
077  @AfterAll
078  public static void tearDownAfterClass() throws Exception {
079    TEST_UTIL.shutdownMiniCluster();
080  }
081
082  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
083    services.getAssignmentManager().getRegionStateStore().deleteRegion(ri);
084    // Delete it from Master context too else it sticks around.
085    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
086  }
087
088  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
089    throws Exception {
090    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
091    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
092    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
093    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
094    services.getCatalogJanitor().scan();
095    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
096    assertTrue(report.isEmpty());
097    int originalCount = ris.size();
098    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount
099    // regions.
100    for (int i = 0; i < replicaCount; i++) {
101      deleteRegion(services, ris.get(3 * replicaCount + i));
102      deleteRegion(services, ris.get(i));
103      deleteRegion(services, ris.get(ris.size() - 1 - i));
104    }
105    assertEquals(initialSize - 3 * replicaCount,
106      services.getAssignmentManager().getRegionStates().getRegionStates().size());
107    services.getCatalogJanitor().scan();
108    report = services.getCatalogJanitor().getLastReport();
109    assertEquals(3, report.getHoles().size(), report.toString());
110    MetaFixer fixer = new MetaFixer(services);
111    fixer.fixHoles(report);
112    services.getCatalogJanitor().scan();
113    report = services.getCatalogJanitor().getLastReport();
114    assertTrue(report.isEmpty(), report.toString());
115    assertEquals(initialSize,
116      services.getAssignmentManager().getRegionStates().getRegionStates().size());
117
118    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
119    // watchdog TestRule terminates the test.
120    HBaseTestingUtil.await(50,
121      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
122
123    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
124    assertEquals(originalCount, ris.size());
125  }
126
127  @Test
128  public void testPlugsHoles(TestInfo testInfo) throws Exception {
129    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
130    testPlugsHolesWithReadReplicaInternal(tn, 1);
131  }
132
133  @Test
134  public void testPlugsHolesWithReadReplica(TestInfo testInfo) throws Exception {
135    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
136    testPlugsHolesWithReadReplicaInternal(tn, 3);
137  }
138
139  /**
140   * Just make sure running fixMeta does right thing for the case of a single-region Table where the
141   * region gets dropped. There is nothing much we can do. We can't restore what we don't know about
142   * (at least from a read of hbase:meta).
143   */
144  @Test
145  public void testOneRegionTable(TestInfo testInfo) throws IOException {
146    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
147    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
148    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
149    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
150    services.getCatalogJanitor().scan();
151    deleteRegion(services, ris.get(0));
152    services.getCatalogJanitor().scan();
153    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
154    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
155    assertTrue(ris.isEmpty());
156    MetaFixer fixer = new MetaFixer(services);
157    fixer.fixHoles(report);
158    report = services.getCatalogJanitor().getLastReport();
159    assertTrue(report.isEmpty());
160    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
161    assertEquals(0, ris.size());
162  }
163
164  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
165    throws IOException {
166    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
167      .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
168    TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem());
169    MetaTableAccessor.putsToMetaTable(services.getConnection(),
170      Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
171        EnvironmentEdgeManager.currentTime())));
172    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
173    long assign = services.getAssignmentManager().assign(overlapRegion);
174    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign);
175    return overlapRegion;
176  }
177
178  private void testOverlapCommon(final TableName tn) throws Exception {
179    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
180    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
181    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
182    assertTrue(ris.size() > 5);
183    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
184    services.getCatalogJanitor().scan();
185    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
186    assertTrue(report.isEmpty());
187    // Make a simple overlap spanning second and third region.
188    makeOverlap(services, ris.get(1), ris.get(3));
189    makeOverlap(services, ris.get(2), ris.get(3));
190    makeOverlap(services, ris.get(2), ris.get(4));
191  }
192
193  @Test
194  public void testOverlap(TestInfo testInfo) throws Exception {
195    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
196    testOverlapCommon(tn);
197    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
198    HbckChore hbckChore = services.getHbckChore();
199
200    CatalogJanitor cj = services.getCatalogJanitor();
201    cj.scan();
202    CatalogJanitorReport report = cj.getLastReport();
203    assertEquals(6, report.getOverlaps().size());
204    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
205    MetaFixer fixer = new MetaFixer(services);
206    fixer.fixOverlaps(report);
207
208    HBaseTestingUtil.await(10, () -> {
209      try {
210        if (cj.scan() > 0) {
211          // It submits GC once, then it will immediately kick off another GC to test if
212          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
213          // a hole.
214          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
215          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
216            List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
217            if (parents != null) {
218              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
219              pe.submitProcedure(
220                new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents));
221            }
222          }
223          return true;
224        }
225        return false;
226      } catch (Exception e) {
227        throw new RuntimeException(e);
228      }
229    });
230
231    // Wait until all GCs settled down
232    HBaseTestingUtil.await(10, () -> {
233      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
234    });
235
236    // No orphan regions on FS
237    hbckChore.choreForTesting();
238    HbckReport hbckReport = hbckChore.getLastReport();
239    assertNotNull(hbckReport);
240    assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
241
242    // No holes reported.
243    cj.scan();
244    final CatalogJanitorReport postReport = cj.getLastReport();
245    assertTrue(postReport.isEmpty());
246  }
247
248  @Test
249  public void testMultipleTableOverlaps() throws Exception {
250    TableName t1 = TableName.valueOf("t1");
251    TableName t2 = TableName.valueOf("t2");
252    TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY });
253    TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY });
254    TEST_UTIL.waitTableAvailable(t2);
255
256    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
257    services.getCatalogJanitor().scan();
258    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
259    assertTrue(report.isEmpty());
260
261    // Make a simple overlap for t1
262    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1);
263    makeOverlap(services, ris.get(1), ris.get(2));
264    // Make a simple overlap for t2
265    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2);
266    makeOverlap(services, ris.get(1), ris.get(2));
267
268    services.getCatalogJanitor().scan();
269    report = services.getCatalogJanitor().getLastReport();
270    assertEquals(4, report.getOverlaps().size(), "Region overlaps count does not match.");
271
272    MetaFixer fixer = new MetaFixer(services);
273    List<Long> longs = fixer.fixOverlaps(report);
274    long[] procIds = longs.stream().mapToLong(l -> l).toArray();
275    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds);
276
277    // After fix, verify no overlaps are left.
278    services.getCatalogJanitor().scan();
279    report = services.getCatalogJanitor().getLastReport();
280    assertTrue(report.isEmpty(), "After fix there should not have been any overlaps.");
281  }
282
283  @Test
284  public void testOverlapWithSmallMergeCount(TestInfo testInfo) throws Exception {
285    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
286    try {
287      testOverlapCommon(tn);
288      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
289      CatalogJanitor cj = services.getCatalogJanitor();
290      cj.scan();
291      CatalogJanitorReport report = cj.getLastReport();
292      assertEquals(6, report.getOverlaps().size());
293      assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size());
294
295      // The max merge count is set to 5 so overlap regions are divided into
296      // two merge requests.
297      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
298        .setInt("hbase.master.metafixer.max.merge.count", 5);
299
300      // Get overlap regions
301      HashSet<String> overlapRegions = new HashSet<>();
302      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
303        overlapRegions.add(pair.getFirst().getRegionNameAsString());
304        overlapRegions.add(pair.getSecond().getRegionNameAsString());
305      }
306
307      MetaFixer fixer = new MetaFixer(services);
308      fixer.fixOverlaps(report);
309      AssignmentManager am = services.getAssignmentManager();
310
311      HBaseTestingUtil.await(200, () -> {
312        try {
313          cj.scan();
314          final CatalogJanitorReport postReport = cj.getLastReport();
315          RegionStates regionStates = am.getRegionStates();
316          RegionStateStore regionStateStore = am.getRegionStateStore();
317          // Make sure that two merged regions are opened and GCs are done.
318          if (postReport.getOverlaps().size() == 1) {
319            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
320            if (
321              (!overlapRegions.contains(pair.getFirst().getRegionNameAsString())
322                && regionStates.getRegionState(pair.getFirst()).isOpened())
323                && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString())
324                  && regionStates.getRegionState(pair.getSecond()).isOpened())
325            ) {
326              // Make sure GC is done.
327              List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst());
328              List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond());
329
330              return (firstParents == null || firstParents.isEmpty())
331                && (secondParents == null || secondParents.isEmpty());
332            }
333          }
334          return false;
335        } catch (Exception e) {
336          throw new RuntimeException(e);
337        }
338      });
339
340      // Second run of fixOverlap should fix all.
341      report = cj.getLastReport();
342      fixer.fixOverlaps(report);
343
344      HBaseTestingUtil.await(20, () -> {
345        try {
346          // Make sure it GC only once.
347          return (cj.scan() > 0);
348        } catch (Exception e) {
349          throw new RuntimeException(e);
350        }
351      });
352
353      // No holes reported.
354      cj.scan();
355      final CatalogJanitorReport postReport = cj.getLastReport();
356      assertTrue(postReport.isEmpty());
357
358    } finally {
359      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
360        .unset("hbase.master.metafixer.max.merge.count");
361
362      TEST_UTIL.deleteTable(tn);
363    }
364  }
365
366  /**
367   * This test covers the case that one of merged parent regions is a merged child region that has
368   * not been GCed but there is no reference files anymore. In this case, it will kick off a GC
369   * procedure, but no merge will happen.
370   */
371  @Test
372  public void testMergeWithMergedChildRegion(TestInfo testInfo) throws Exception {
373    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
374    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
375    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
376    assertTrue(ris.size() > 5);
377    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
378    CatalogJanitor cj = services.getCatalogJanitor();
379    cj.scan();
380    CatalogJanitorReport report = cj.getLastReport();
381    assertTrue(report.isEmpty());
382    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
383
384    cj.scan();
385    report = cj.getLastReport();
386    assertEquals(2, report.getOverlaps().size());
387
388    // Mark it as a merged child region.
389    RegionInfo fakedParentRegion =
390      RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build();
391
392    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
393    Put putOfMerged =
394      MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP);
395    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
396    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
397      .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY)
398      .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp())
399      .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build());
400
401    meta.put(putOfMerged);
402
403    MetaFixer fixer = new MetaFixer(services);
404    fixer.fixOverlaps(report);
405
406    // Wait until all procedures settled down
407    HBaseTestingUtil.await(200, () -> {
408      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
409    });
410
411    // No merge is done, overlap is still there.
412    cj.scan();
413    report = cj.getLastReport();
414    assertEquals(2, report.getOverlaps().size());
415
416    fixer.fixOverlaps(report);
417
418    // Wait until all procedures settled down
419    HBaseTestingUtil.await(200, () -> {
420      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
421    });
422
423    // Merge is done and no more overlaps
424    cj.scan();
425    report = cj.getLastReport();
426    assertEquals(0, report.getOverlaps().size());
427  }
428
429  /**
430   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we
431   * can fix this condition. HBASE-24247
432   */
433  @Test
434  public void testOverlapWithMergeOfNonContiguous(TestInfo testInfo) throws Exception {
435    TableName tn = TableName.valueOf(testInfo.getTestMethod().get().getName());
436    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
437    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
438    assertTrue(ris.size() > 5);
439    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
440    services.getCatalogJanitor().scan();
441    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
442    assertTrue(report.isEmpty());
443    // Make a simple overlap spanning second and third region.
444    makeOverlap(services, ris.get(1), ris.get(5));
445    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
446    RegionInfo deletedRegion = ris.get(3);
447    long pid = services.getAssignmentManager().unassign(deletedRegion);
448    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
449      Threads.sleep(100);
450    }
451    GCRegionProcedure procedure =
452      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
453    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
454    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
455      Threads.sleep(100);
456    }
457    services.getCatalogJanitor().scan();
458    report = services.getCatalogJanitor().getLastReport();
459    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
460    MetaFixer fixer = new MetaFixer(services);
461    fixer.fixOverlaps(report);
462    HBaseTestingUtil.await(10, () -> {
463      try {
464        services.getCatalogJanitor().scan();
465        final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport();
466        return postReport.isEmpty();
467      } catch (Exception e) {
468        throw new RuntimeException(e);
469      }
470    });
471  }
472}