001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023import java.io.IOException;
024import java.util.Collections;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellBuilderFactory;
030import org.apache.hadoop.hbase.CellBuilderType;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MetaTableAccessor;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.client.Result;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.master.HMaster;
042import org.apache.hadoop.hbase.master.HbckChore;
043import org.apache.hadoop.hbase.master.MasterServices;
044import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
045import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
047import org.apache.hadoop.hbase.master.assignment.RegionStates;
048import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
049import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
050import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
051import org.apache.hadoop.hbase.testclassification.LargeTests;
052import org.apache.hadoop.hbase.testclassification.MasterTests;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.Pair;
055import org.apache.hadoop.hbase.util.Threads;
056import org.junit.AfterClass;
057import org.junit.BeforeClass;
058import org.junit.ClassRule;
059import org.junit.Rule;
060import org.junit.Test;
061import org.junit.experimental.categories.Category;
062import org.junit.rules.TestName;
063
064@Category({MasterTests.class, LargeTests.class})
065public class TestMetaFixer {
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068      HBaseClassTestRule.forClass(TestMetaFixer.class);
069  @Rule
070  public TestName name = new TestName();
071
072  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
073
074  @BeforeClass
075  public static void setupBeforeClass() throws Exception {
076    TEST_UTIL.startMiniCluster();
077  }
078
079  @AfterClass
080  public static void tearDownAfterClass() throws Exception {
081    TEST_UTIL.shutdownMiniCluster();
082  }
083
084  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
085    MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri);
086    // Delete it from Master context too else it sticks around.
087    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
088  }
089
090  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
091    throws Exception {
092    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
093    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
094    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
095    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
096    services.getCatalogJanitor().scan();
097    Report report = services.getCatalogJanitor().getLastReport();
098    assertTrue(report.isEmpty());
099    int originalCount = ris.size();
100    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount regions.
101    for (int i = 0; i < replicaCount; i ++) {
102      deleteRegion(services, ris.get(3 * replicaCount + i));
103      deleteRegion(services, ris.get(i));
104      deleteRegion(services, ris.get(ris.size() - 1 - i));
105    }
106    assertEquals(initialSize - 3 * replicaCount,
107      services.getAssignmentManager().getRegionStates().getRegionStates().size());
108    services.getCatalogJanitor().scan();
109    report = services.getCatalogJanitor().getLastReport();
110    assertEquals(report.toString(), 3, report.getHoles().size());
111    MetaFixer fixer = new MetaFixer(services);
112    fixer.fixHoles(report);
113    services.getCatalogJanitor().scan();
114    report = services.getCatalogJanitor().getLastReport();
115    assertTrue(report.toString(), report.isEmpty());
116    assertEquals(initialSize,
117      services.getAssignmentManager().getRegionStates().getRegionStates().size());
118
119    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
120    // watchdog TestRule terminates the test.
121    HBaseTestingUtility.await(50,
122      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
123
124    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
125    assertEquals(originalCount, ris.size());
126  }
127
128  @Test
129  public void testPlugsHoles() throws Exception {
130    TableName tn = TableName.valueOf(this.name.getMethodName());
131    testPlugsHolesWithReadReplicaInternal(tn, 1);
132  }
133
134  @Test
135  public void testPlugsHolesWithReadReplica() throws Exception {
136    TableName tn = TableName.valueOf(this.name.getMethodName());
137    testPlugsHolesWithReadReplicaInternal(tn, 3);
138  }
139
140  /**
141   * Just make sure running fixMeta does right thing for the case
142   * of a single-region Table where the region gets dropped.
143   * There is nothing much we can do. We can't restore what
144   * we don't know about (at least from a read of hbase:meta).
145   */
146  @Test
147  public void testOneRegionTable() throws IOException {
148    TableName tn = TableName.valueOf(this.name.getMethodName());
149    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
150    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
151    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
152    services.getCatalogJanitor().scan();
153    deleteRegion(services, ris.get(0));
154    services.getCatalogJanitor().scan();
155    Report report = services.getCatalogJanitor().getLastReport();
156    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
157    assertTrue(ris.isEmpty());
158    MetaFixer fixer = new MetaFixer(services);
159    fixer.fixHoles(report);
160    report = services.getCatalogJanitor().getLastReport();
161    assertTrue(report.isEmpty());
162    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
163    assertEquals(0, ris.size());
164  }
165
166  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
167      throws IOException {
168    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()).
169        setStartKey(a.getStartKey()).
170        setEndKey(b.getEndKey()).
171        build();
172    MetaTableAccessor.putsToMetaTable(services.getConnection(),
173        Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
174            System.currentTimeMillis())));
175    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
176    long assign = services.getAssignmentManager().assign(overlapRegion);
177    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign);
178    return overlapRegion;
179  }
180
181  private void testOverlapCommon(final TableName tn) throws Exception {
182    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
183    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
184    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
185    assertTrue(ris.size() > 5);
186    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
187    services.getCatalogJanitor().scan();
188    Report report = services.getCatalogJanitor().getLastReport();
189    assertTrue(report.isEmpty());
190    // Make a simple overlap spanning second and third region.
191    makeOverlap(services, ris.get(1), ris.get(3));
192    makeOverlap(services, ris.get(2), ris.get(3));
193    makeOverlap(services, ris.get(2), ris.get(4));
194  }
195
196  @Test
197  public void testOverlap() throws Exception {
198    TableName tn = TableName.valueOf(this.name.getMethodName());
199    testOverlapCommon(tn);
200    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
201    HbckChore hbckChore = services.getHbckChore();
202
203    CatalogJanitor cj = services.getCatalogJanitor();
204    cj.scan();
205    Report report = cj.getLastReport();
206    assertEquals(6, report.getOverlaps().size());
207    assertEquals(1,
208      MetaFixer.calculateMerges(10, report.getOverlaps()).size());
209    MetaFixer fixer = new MetaFixer(services);
210    fixer.fixOverlaps(report);
211
212    HBaseTestingUtility. await(10, () -> {
213      try {
214        if (cj.scan() > 0) {
215          // It submits GC once, then it will immediately kick off another GC to test if
216          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
217          // a hole.
218          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
219          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
220            List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
221            if (parents != null) {
222              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
223              pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
224                e.getKey(), parents));
225            }
226          }
227          return true;
228        }
229        return false;
230      } catch (Exception e) {
231        throw new RuntimeException(e);
232      }
233    });
234
235    // Wait until all GCs settled down
236    HBaseTestingUtility.await(10, () -> {
237      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
238    });
239
240    // No orphan regions on FS
241    hbckChore.choreForTesting();
242    assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
243
244    // No holes reported.
245    cj.scan();
246    final Report postReport = cj.getLastReport();
247    assertTrue(postReport.isEmpty());
248  }
249
250  @Test
251  public void testMultipleTableOverlaps() throws Exception {
252    TableName t1 = TableName.valueOf("t1");
253    TableName t2 = TableName.valueOf("t2");
254    TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY });
255    TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY });
256    TEST_UTIL.waitTableAvailable(t2);
257
258    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
259    services.getCatalogJanitor().scan();
260    Report report = services.getCatalogJanitor().getLastReport();
261    assertTrue(report.isEmpty());
262
263    // Make a simple overlap for t1
264    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1);
265    makeOverlap(services, ris.get(1), ris.get(2));
266    // Make a simple overlap for t2
267    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2);
268    makeOverlap(services, ris.get(1), ris.get(2));
269
270    services.getCatalogJanitor().scan();
271    report = services.getCatalogJanitor().getLastReport();
272    assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size());
273
274    MetaFixer fixer = new MetaFixer(services);
275    List<Long> longs = fixer.fixOverlaps(report);
276    long[] procIds = longs.stream().mapToLong(l -> l).toArray();
277    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds);
278
279    // After fix, verify no overlaps are left.
280    services.getCatalogJanitor().scan();
281    report = services.getCatalogJanitor().getLastReport();
282    assertTrue("After fix there should not have been any overlaps.", report.isEmpty());
283  }
284
285  @Test
286  public void testOverlapWithSmallMergeCount() throws Exception {
287    TableName tn = TableName.valueOf(this.name.getMethodName());
288    try {
289      testOverlapCommon(tn);
290      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
291      CatalogJanitor cj = services.getCatalogJanitor();
292      cj.scan();
293      Report report = cj.getLastReport();
294      assertEquals(6, report.getOverlaps().size());
295      assertEquals(2,
296        MetaFixer.calculateMerges(5, report.getOverlaps()).size());
297
298      // The max merge count is set to 5 so overlap regions are divided into
299      // two merge requests.
300      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt(
301        "hbase.master.metafixer.max.merge.count", 5);
302
303      // Get overlap regions
304      HashSet<String> overlapRegions = new HashSet<>();
305      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
306        overlapRegions.add(pair.getFirst().getRegionNameAsString());
307        overlapRegions.add(pair.getSecond().getRegionNameAsString());
308      }
309
310      MetaFixer fixer = new MetaFixer(services);
311      fixer.fixOverlaps(report);
312      AssignmentManager am = services.getAssignmentManager();
313
314      HBaseTestingUtility.await(200, () -> {
315        try {
316          cj.scan();
317          final Report postReport = cj.getLastReport();
318          RegionStates regionStates = am.getRegionStates();
319
320          // Make sure that two merged regions are opened and GCs are done.
321          if (postReport.getOverlaps().size() == 1) {
322            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
323            if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) &&
324              regionStates.getRegionState(pair.getFirst()).isOpened()) &&
325              (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) &&
326              regionStates.getRegionState(pair.getSecond()).isOpened())) {
327              // Make sure GC is done.
328              List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions(
329                services.getConnection(), pair.getFirst().getRegionName());
330              List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions(
331                services.getConnection(), pair.getSecond().getRegionName());
332
333              return (firstParents == null || firstParents.isEmpty()) &&
334                (secondParents == null || secondParents.isEmpty());
335            }
336          }
337          return false;
338        } catch (Exception e) {
339          throw new RuntimeException(e);
340        }
341      });
342
343      // Second run of fixOverlap should fix all.
344      report = cj.getLastReport();
345      fixer.fixOverlaps(report);
346
347      HBaseTestingUtility.await(20, () -> {
348        try {
349          // Make sure it GC only once.
350          return (cj.scan() > 0);
351        } catch (Exception e) {
352          throw new RuntimeException(e);
353        }
354      });
355
356      // No holes reported.
357      cj.scan();
358      final Report postReport = cj.getLastReport();
359      assertTrue(postReport.isEmpty());
360
361    } finally {
362      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset(
363        "hbase.master.metafixer.max.merge.count");
364
365      TEST_UTIL.deleteTable(tn);
366    }
367  }
368
369  /**
370   * This test covers the case that one of merged parent regions is a merged child region that
371   * has not been GCed but there is no reference files anymore. In this case, it will kick off
372   * a GC procedure, but no merge will happen.
373   */
374  @Test
375  public void testMergeWithMergedChildRegion() throws Exception {
376    TableName tn = TableName.valueOf(this.name.getMethodName());
377    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
378    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
379    assertTrue(ris.size() > 5);
380    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
381    CatalogJanitor cj = services.getCatalogJanitor();
382    cj.scan();
383    Report report = cj.getLastReport();
384    assertTrue(report.isEmpty());
385    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
386
387    cj.scan();
388    report = cj.getLastReport();
389    assertEquals(2, report.getOverlaps().size());
390
391    // Mark it as a merged child region.
392    RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn).
393      setStartKey(overlapRegion.getStartKey()).
394      build();
395
396    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
397    Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
398      HConstants.LATEST_TIMESTAMP);
399    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
400    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(
401      putOfMerged.getRow()).
402      setFamily(HConstants.CATALOG_FAMILY).
403      setQualifier(Bytes.toBytes(qualifier)).
404      setTimestamp(putOfMerged.getTimestamp()).
405      setType(Cell.Type.Put).
406      setValue(RegionInfo.toByteArray(fakedParentRegion)).
407      build());
408
409    meta.put(putOfMerged);
410
411    MetaFixer fixer = new MetaFixer(services);
412    fixer.fixOverlaps(report);
413
414    // Wait until all procedures settled down
415    HBaseTestingUtility.await(200, () -> {
416      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
417    });
418
419    // No merge is done, overlap is still there.
420    cj.scan();
421    report = cj.getLastReport();
422    assertEquals(2, report.getOverlaps().size());
423
424    fixer.fixOverlaps(report);
425
426    // Wait until all procedures settled down
427    HBaseTestingUtility.await(200, () -> {
428      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
429    });
430
431    // Merge is done and no more overlaps
432    cj.scan();
433    report = cj.getLastReport();
434    assertEquals(0, report.getOverlaps().size());
435  }
436
437  /**
438   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so
439   * we can fix this condition. HBASE-24247
440   */
441  @Test
442  public void testOverlapWithMergeOfNonContiguous() throws Exception {
443    TableName tn = TableName.valueOf(this.name.getMethodName());
444    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
445    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
446    assertTrue(ris.size() > 5);
447    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
448    services.getCatalogJanitor().scan();
449    Report report = services.getCatalogJanitor().getLastReport();
450    assertTrue(report.isEmpty());
451    // Make a simple overlap spanning second and third region.
452    makeOverlap(services, ris.get(1), ris.get(5));
453    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
454    RegionInfo deletedRegion = ris.get(3);
455    long pid = services.getAssignmentManager().unassign(deletedRegion);
456    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
457      Threads.sleep(100);
458    }
459    GCRegionProcedure procedure =
460      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
461    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
462    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
463      Threads.sleep(100);
464    }
465    services.getCatalogJanitor().scan();
466    report = services.getCatalogJanitor().getLastReport();
467    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
468    MetaFixer fixer = new MetaFixer(services);
469    fixer.fixOverlaps(report);
470    HBaseTestingUtility.await(10, () -> {
471      try {
472        services.getCatalogJanitor().scan();
473        final Report postReport = services.getCatalogJanitor().getLastReport();
474        return postReport.isEmpty();
475      } catch (Exception e) {
476        throw new RuntimeException(e);
477      }
478    });
479  }
480}