001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import org.apache.hadoop.hbase.CatalogFamilyFormat;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellBuilderFactory;
032import org.apache.hadoop.hbase.CellBuilderType;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.MetaTableAccessor;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Result;
042import org.apache.hadoop.hbase.client.Table;
043import org.apache.hadoop.hbase.master.HMaster;
044import org.apache.hadoop.hbase.master.MasterServices;
045import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
047import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
048import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
049import org.apache.hadoop.hbase.master.assignment.RegionStates;
050import org.apache.hadoop.hbase.master.hbck.HbckChore;
051import org.apache.hadoop.hbase.master.hbck.HbckReport;
052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
054import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
055import org.apache.hadoop.hbase.testclassification.LargeTests;
056import org.apache.hadoop.hbase.testclassification.MasterTests;
057import org.apache.hadoop.hbase.util.Bytes;
058import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
059import org.apache.hadoop.hbase.util.Pair;
060import org.apache.hadoop.hbase.util.Threads;
061import org.junit.AfterClass;
062import org.junit.BeforeClass;
063import org.junit.ClassRule;
064import org.junit.Rule;
065import org.junit.Test;
066import org.junit.experimental.categories.Category;
067import org.junit.rules.TestName;
068
069@Category({ MasterTests.class, LargeTests.class })
070public class TestMetaFixer {
071  @ClassRule
072  public static final HBaseClassTestRule CLASS_RULE =
073    HBaseClassTestRule.forClass(TestMetaFixer.class);
074  @Rule
075  public TestName name = new TestName();
076
077  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
078
079  @BeforeClass
080  public static void setupBeforeClass() throws Exception {
081    TEST_UTIL.startMiniCluster();
082  }
083
084  @AfterClass
085  public static void tearDownAfterClass() throws Exception {
086    TEST_UTIL.shutdownMiniCluster();
087  }
088
089  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
090    services.getAssignmentManager().getRegionStateStore().deleteRegion(ri);
091    // Delete it from Master context too else it sticks around.
092    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
093  }
094
095  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
096    throws Exception {
097    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
098    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
099    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
100    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
101    services.getCatalogJanitor().scan();
102    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
103    assertTrue(report.isEmpty());
104    int originalCount = ris.size();
105    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount
106    // regions.
107    for (int i = 0; i < replicaCount; i++) {
108      deleteRegion(services, ris.get(3 * replicaCount + i));
109      deleteRegion(services, ris.get(i));
110      deleteRegion(services, ris.get(ris.size() - 1 - i));
111    }
112    assertEquals(initialSize - 3 * replicaCount,
113      services.getAssignmentManager().getRegionStates().getRegionStates().size());
114    services.getCatalogJanitor().scan();
115    report = services.getCatalogJanitor().getLastReport();
116    assertEquals(report.toString(), 3, report.getHoles().size());
117    MetaFixer fixer = new MetaFixer(services);
118    fixer.fixHoles(report);
119    services.getCatalogJanitor().scan();
120    report = services.getCatalogJanitor().getLastReport();
121    assertTrue(report.toString(), report.isEmpty());
122    assertEquals(initialSize,
123      services.getAssignmentManager().getRegionStates().getRegionStates().size());
124
125    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
126    // watchdog TestRule terminates the test.
127    HBaseTestingUtil.await(50,
128      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
129
130    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
131    assertEquals(originalCount, ris.size());
132  }
133
134  @Test
135  public void testPlugsHoles() throws Exception {
136    TableName tn = TableName.valueOf(this.name.getMethodName());
137    testPlugsHolesWithReadReplicaInternal(tn, 1);
138  }
139
140  @Test
141  public void testPlugsHolesWithReadReplica() throws Exception {
142    TableName tn = TableName.valueOf(this.name.getMethodName());
143    testPlugsHolesWithReadReplicaInternal(tn, 3);
144  }
145
146  /**
147   * Just make sure running fixMeta does right thing for the case of a single-region Table where the
148   * region gets dropped. There is nothing much we can do. We can't restore what we don't know about
149   * (at least from a read of hbase:meta).
150   */
151  @Test
152  public void testOneRegionTable() throws IOException {
153    TableName tn = TableName.valueOf(this.name.getMethodName());
154    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
155    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
156    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
157    services.getCatalogJanitor().scan();
158    deleteRegion(services, ris.get(0));
159    services.getCatalogJanitor().scan();
160    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
161    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
162    assertTrue(ris.isEmpty());
163    MetaFixer fixer = new MetaFixer(services);
164    fixer.fixHoles(report);
165    report = services.getCatalogJanitor().getLastReport();
166    assertTrue(report.isEmpty());
167    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
168    assertEquals(0, ris.size());
169  }
170
171  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
172    throws IOException {
173    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
174      .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
175    TEST_UTIL.createRegionDir(overlapRegion, services.getMasterFileSystem());
176    MetaTableAccessor.putsToMetaTable(services.getConnection(),
177      Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
178        EnvironmentEdgeManager.currentTime())));
179    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
180    long assign = services.getAssignmentManager().assign(overlapRegion);
181    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign);
182    return overlapRegion;
183  }
184
185  private void testOverlapCommon(final TableName tn) throws Exception {
186    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
187    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
188    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
189    assertTrue(ris.size() > 5);
190    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
191    services.getCatalogJanitor().scan();
192    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
193    assertTrue(report.isEmpty());
194    // Make a simple overlap spanning second and third region.
195    makeOverlap(services, ris.get(1), ris.get(3));
196    makeOverlap(services, ris.get(2), ris.get(3));
197    makeOverlap(services, ris.get(2), ris.get(4));
198  }
199
200  @Test
201  public void testOverlap() throws Exception {
202    TableName tn = TableName.valueOf(this.name.getMethodName());
203    testOverlapCommon(tn);
204    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
205    HbckChore hbckChore = services.getHbckChore();
206
207    CatalogJanitor cj = services.getCatalogJanitor();
208    cj.scan();
209    CatalogJanitorReport report = cj.getLastReport();
210    assertEquals(6, report.getOverlaps().size());
211    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
212    MetaFixer fixer = new MetaFixer(services);
213    fixer.fixOverlaps(report);
214
215    HBaseTestingUtil.await(10, () -> {
216      try {
217        if (cj.scan() > 0) {
218          // It submits GC once, then it will immediately kick off another GC to test if
219          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
220          // a hole.
221          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
222          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
223            List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
224            if (parents != null) {
225              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
226              pe.submitProcedure(
227                new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents));
228            }
229          }
230          return true;
231        }
232        return false;
233      } catch (Exception e) {
234        throw new RuntimeException(e);
235      }
236    });
237
238    // Wait until all GCs settled down
239    HBaseTestingUtil.await(10, () -> {
240      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
241    });
242
243    // No orphan regions on FS
244    hbckChore.choreForTesting();
245    HbckReport hbckReport = hbckChore.getLastReport();
246    assertNotNull(hbckReport);
247    assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
248
249    // No holes reported.
250    cj.scan();
251    final CatalogJanitorReport postReport = cj.getLastReport();
252    assertTrue(postReport.isEmpty());
253  }
254
255  @Test
256  public void testMultipleTableOverlaps() throws Exception {
257    TableName t1 = TableName.valueOf("t1");
258    TableName t2 = TableName.valueOf("t2");
259    TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY });
260    TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY });
261    TEST_UTIL.waitTableAvailable(t2);
262
263    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
264    services.getCatalogJanitor().scan();
265    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
266    assertTrue(report.isEmpty());
267
268    // Make a simple overlap for t1
269    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1);
270    makeOverlap(services, ris.get(1), ris.get(2));
271    // Make a simple overlap for t2
272    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2);
273    makeOverlap(services, ris.get(1), ris.get(2));
274
275    services.getCatalogJanitor().scan();
276    report = services.getCatalogJanitor().getLastReport();
277    assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size());
278
279    MetaFixer fixer = new MetaFixer(services);
280    List<Long> longs = fixer.fixOverlaps(report);
281    long[] procIds = longs.stream().mapToLong(l -> l).toArray();
282    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds);
283
284    // After fix, verify no overlaps are left.
285    services.getCatalogJanitor().scan();
286    report = services.getCatalogJanitor().getLastReport();
287    assertTrue("After fix there should not have been any overlaps.", report.isEmpty());
288  }
289
290  @Test
291  public void testOverlapWithSmallMergeCount() throws Exception {
292    TableName tn = TableName.valueOf(this.name.getMethodName());
293    try {
294      testOverlapCommon(tn);
295      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
296      CatalogJanitor cj = services.getCatalogJanitor();
297      cj.scan();
298      CatalogJanitorReport report = cj.getLastReport();
299      assertEquals(6, report.getOverlaps().size());
300      assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size());
301
302      // The max merge count is set to 5 so overlap regions are divided into
303      // two merge requests.
304      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
305        .setInt("hbase.master.metafixer.max.merge.count", 5);
306
307      // Get overlap regions
308      HashSet<String> overlapRegions = new HashSet<>();
309      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
310        overlapRegions.add(pair.getFirst().getRegionNameAsString());
311        overlapRegions.add(pair.getSecond().getRegionNameAsString());
312      }
313
314      MetaFixer fixer = new MetaFixer(services);
315      fixer.fixOverlaps(report);
316      AssignmentManager am = services.getAssignmentManager();
317
318      HBaseTestingUtil.await(200, () -> {
319        try {
320          cj.scan();
321          final CatalogJanitorReport postReport = cj.getLastReport();
322          RegionStates regionStates = am.getRegionStates();
323          RegionStateStore regionStateStore = am.getRegionStateStore();
324          // Make sure that two merged regions are opened and GCs are done.
325          if (postReport.getOverlaps().size() == 1) {
326            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
327            if (
328              (!overlapRegions.contains(pair.getFirst().getRegionNameAsString())
329                && regionStates.getRegionState(pair.getFirst()).isOpened())
330                && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString())
331                  && regionStates.getRegionState(pair.getSecond()).isOpened())
332            ) {
333              // Make sure GC is done.
334              List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst());
335              List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond());
336
337              return (firstParents == null || firstParents.isEmpty())
338                && (secondParents == null || secondParents.isEmpty());
339            }
340          }
341          return false;
342        } catch (Exception e) {
343          throw new RuntimeException(e);
344        }
345      });
346
347      // Second run of fixOverlap should fix all.
348      report = cj.getLastReport();
349      fixer.fixOverlaps(report);
350
351      HBaseTestingUtil.await(20, () -> {
352        try {
353          // Make sure it GC only once.
354          return (cj.scan() > 0);
355        } catch (Exception e) {
356          throw new RuntimeException(e);
357        }
358      });
359
360      // No holes reported.
361      cj.scan();
362      final CatalogJanitorReport postReport = cj.getLastReport();
363      assertTrue(postReport.isEmpty());
364
365    } finally {
366      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
367        .unset("hbase.master.metafixer.max.merge.count");
368
369      TEST_UTIL.deleteTable(tn);
370    }
371  }
372
373  /**
374   * This test covers the case that one of merged parent regions is a merged child region that has
375   * not been GCed but there is no reference files anymore. In this case, it will kick off a GC
376   * procedure, but no merge will happen.
377   */
378  @Test
379  public void testMergeWithMergedChildRegion() throws Exception {
380    TableName tn = TableName.valueOf(this.name.getMethodName());
381    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
382    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
383    assertTrue(ris.size() > 5);
384    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
385    CatalogJanitor cj = services.getCatalogJanitor();
386    cj.scan();
387    CatalogJanitorReport report = cj.getLastReport();
388    assertTrue(report.isEmpty());
389    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
390
391    cj.scan();
392    report = cj.getLastReport();
393    assertEquals(2, report.getOverlaps().size());
394
395    // Mark it as a merged child region.
396    RegionInfo fakedParentRegion =
397      RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build();
398
399    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
400    Put putOfMerged =
401      MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP);
402    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
403    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
404      .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY)
405      .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp())
406      .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build());
407
408    meta.put(putOfMerged);
409
410    MetaFixer fixer = new MetaFixer(services);
411    fixer.fixOverlaps(report);
412
413    // Wait until all procedures settled down
414    HBaseTestingUtil.await(200, () -> {
415      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
416    });
417
418    // No merge is done, overlap is still there.
419    cj.scan();
420    report = cj.getLastReport();
421    assertEquals(2, report.getOverlaps().size());
422
423    fixer.fixOverlaps(report);
424
425    // Wait until all procedures settled down
426    HBaseTestingUtil.await(200, () -> {
427      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
428    });
429
430    // Merge is done and no more overlaps
431    cj.scan();
432    report = cj.getLastReport();
433    assertEquals(0, report.getOverlaps().size());
434  }
435
436  /**
437   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we
438   * can fix this condition. HBASE-24247
439   */
440  @Test
441  public void testOverlapWithMergeOfNonContiguous() throws Exception {
442    TableName tn = TableName.valueOf(this.name.getMethodName());
443    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
444    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
445    assertTrue(ris.size() > 5);
446    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
447    services.getCatalogJanitor().scan();
448    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
449    assertTrue(report.isEmpty());
450    // Make a simple overlap spanning second and third region.
451    makeOverlap(services, ris.get(1), ris.get(5));
452    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
453    RegionInfo deletedRegion = ris.get(3);
454    long pid = services.getAssignmentManager().unassign(deletedRegion);
455    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
456      Threads.sleep(100);
457    }
458    GCRegionProcedure procedure =
459      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
460    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
461    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
462      Threads.sleep(100);
463    }
464    services.getCatalogJanitor().scan();
465    report = services.getCatalogJanitor().getLastReport();
466    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
467    MetaFixer fixer = new MetaFixer(services);
468    fixer.fixOverlaps(report);
469    HBaseTestingUtil.await(10, () -> {
470      try {
471        services.getCatalogJanitor().scan();
472        final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport();
473        return postReport.isEmpty();
474      } catch (Exception e) {
475        throw new RuntimeException(e);
476      }
477    });
478  }
479}