001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023import java.io.IOException;
024import java.util.Collections;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellBuilderFactory;
030import org.apache.hadoop.hbase.CellBuilderType;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MetaTableAccessor;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.client.Result;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
042import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
043import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
044import org.apache.hadoop.hbase.master.assignment.RegionStates;
045import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.MasterTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.Threads;
052import org.junit.AfterClass;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Rule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058import org.junit.rules.TestName;
059
060@Category({MasterTests.class, LargeTests.class})
061public class TestMetaFixer {
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestMetaFixer.class);
065  @Rule
066  public TestName name = new TestName();
067
068  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
069
070  @BeforeClass
071  public static void setupBeforeClass() throws Exception {
072    TEST_UTIL.startMiniCluster();
073  }
074
075  @AfterClass
076  public static void tearDownAfterClass() throws Exception {
077    TEST_UTIL.shutdownMiniCluster();
078  }
079
080  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
081    MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri);
082    // Delete it from Master context too else it sticks around.
083    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
084  }
085
086  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
087    throws Exception {
088    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
089    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
090    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
091    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
092    services.getCatalogJanitor().scan();
093    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
094    assertTrue(report.isEmpty());
095    int originalCount = ris.size();
096    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount regions.
097    for (int i = 0; i < replicaCount; i ++) {
098      deleteRegion(services, ris.get(3 * replicaCount + i));
099      deleteRegion(services, ris.get(i));
100      deleteRegion(services, ris.get(ris.size() - 1 - i));
101    }
102    assertEquals(initialSize - 3 * replicaCount,
103      services.getAssignmentManager().getRegionStates().getRegionStates().size());
104    services.getCatalogJanitor().scan();
105    report = services.getCatalogJanitor().getLastReport();
106    assertEquals(report.toString(), 3, report.getHoles().size());
107    MetaFixer fixer = new MetaFixer(services);
108    fixer.fixHoles(report);
109    services.getCatalogJanitor().scan();
110    report = services.getCatalogJanitor().getLastReport();
111    assertTrue(report.toString(), report.isEmpty());
112    assertEquals(initialSize,
113      services.getAssignmentManager().getRegionStates().getRegionStates().size());
114
115    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
116    // watchdog TestRule terminates the test.
117    HBaseTestingUtility.await(50,
118      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
119
120    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
121    assertEquals(originalCount, ris.size());
122  }
123
124  @Test
125  public void testPlugsHoles() throws Exception {
126    TableName tn = TableName.valueOf(this.name.getMethodName());
127    testPlugsHolesWithReadReplicaInternal(tn, 1);
128  }
129
130  @Test
131  public void testPlugsHolesWithReadReplica() throws Exception {
132    TableName tn = TableName.valueOf(this.name.getMethodName());
133    testPlugsHolesWithReadReplicaInternal(tn, 3);
134  }
135
136  /**
137   * Just make sure running fixMeta does right thing for the case
138   * of a single-region Table where the region gets dropped.
139   * There is nothing much we can do. We can't restore what
140   * we don't know about (at least from a read of hbase:meta).
141   */
142  @Test
143  public void testOneRegionTable() throws IOException {
144    TableName tn = TableName.valueOf(this.name.getMethodName());
145    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
146    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
147    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
148    services.getCatalogJanitor().scan();
149    deleteRegion(services, ris.get(0));
150    services.getCatalogJanitor().scan();
151    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
152    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
153    assertTrue(ris.isEmpty());
154    MetaFixer fixer = new MetaFixer(services);
155    fixer.fixHoles(report);
156    report = services.getCatalogJanitor().getLastReport();
157    assertTrue(report.isEmpty());
158    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
159    assertEquals(0, ris.size());
160  }
161
162  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
163      throws IOException {
164    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()).
165        setStartKey(a.getStartKey()).
166        setEndKey(b.getEndKey()).
167        build();
168    MetaTableAccessor.putsToMetaTable(services.getConnection(),
169        Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
170            System.currentTimeMillis())));
171    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
172    services.getAssignmentManager().assign(overlapRegion);
173    return overlapRegion;
174  }
175
176  private void testOverlapCommon(final TableName tn) throws Exception {
177    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
178    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
179    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
180    assertTrue(ris.size() > 5);
181    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
182    services.getCatalogJanitor().scan();
183    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
184    assertTrue(report.isEmpty());
185    // Make a simple overlap spanning second and third region.
186    makeOverlap(services, ris.get(1), ris.get(3));
187    makeOverlap(services, ris.get(2), ris.get(3));
188    makeOverlap(services, ris.get(2), ris.get(4));
189  }
190
191  @Test
192  public void testOverlap() throws Exception {
193    TableName tn = TableName.valueOf(this.name.getMethodName());
194    testOverlapCommon(tn);
195    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
196    HbckChore hbckChore = services.getHbckChore();
197
198    CatalogJanitor cj = services.getCatalogJanitor();
199    cj.scan();
200    CatalogJanitor.Report report = cj.getLastReport();
201    assertEquals(6, report.getOverlaps().size());
202    assertEquals(1,
203      MetaFixer.calculateMerges(10, report.getOverlaps()).size());
204    MetaFixer fixer = new MetaFixer(services);
205    fixer.fixOverlaps(report);
206
207    HBaseTestingUtility. await(10, () -> {
208      try {
209        if (cj.scan() > 0) {
210          // It submits GC once, then it will immediately kick off another GC to test if
211          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
212          // a hole.
213          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
214          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
215            List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
216            if (parents != null) {
217              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
218              pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
219                e.getKey(), parents));
220            }
221          }
222          return true;
223        }
224        return false;
225      } catch (Exception e) {
226        throw new RuntimeException(e);
227      }
228    });
229
230    // Wait until all GCs settled down
231    HBaseTestingUtility.await(10, () -> {
232      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
233    });
234
235    // No orphan regions on FS
236    hbckChore.chore();
237    assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
238
239    // No holes reported.
240    cj.scan();
241    final CatalogJanitor.Report postReport = cj.getLastReport();
242    assertTrue(postReport.isEmpty());
243  }
244
245  @Test
246  public void testOverlapWithSmallMergeCount() throws Exception {
247    TableName tn = TableName.valueOf(this.name.getMethodName());
248    try {
249      testOverlapCommon(tn);
250      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
251      CatalogJanitor cj = services.getCatalogJanitor();
252      cj.scan();
253      CatalogJanitor.Report report = cj.getLastReport();
254      assertEquals(6, report.getOverlaps().size());
255      assertEquals(2,
256        MetaFixer.calculateMerges(5, report.getOverlaps()).size());
257
258      // The max merge count is set to 5 so overlap regions are divided into
259      // two merge requests.
260      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt(
261        "hbase.master.metafixer.max.merge.count", 5);
262
263      // Get overlap regions
264      HashSet<String> overlapRegions = new HashSet<>();
265      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
266        overlapRegions.add(pair.getFirst().getRegionNameAsString());
267        overlapRegions.add(pair.getSecond().getRegionNameAsString());
268      }
269
270      MetaFixer fixer = new MetaFixer(services);
271      fixer.fixOverlaps(report);
272      AssignmentManager am = services.getAssignmentManager();
273
274      HBaseTestingUtility.await(200, () -> {
275        try {
276          cj.scan();
277          final CatalogJanitor.Report postReport = cj.getLastReport();
278          RegionStates regionStates = am.getRegionStates();
279
280          // Make sure that two merged regions are opened and GCs are done.
281          if (postReport.getOverlaps().size() == 1) {
282            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
283            if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) &&
284              regionStates.getRegionState(pair.getFirst()).isOpened()) &&
285              (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) &&
286              regionStates.getRegionState(pair.getSecond()).isOpened())) {
287              // Make sure GC is done.
288              List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions(
289                services.getConnection(), pair.getFirst().getRegionName());
290              List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions(
291                services.getConnection(), pair.getSecond().getRegionName());
292
293              return (firstParents == null || firstParents.isEmpty()) &&
294                (secondParents == null || secondParents.isEmpty());
295            }
296          }
297          return false;
298        } catch (Exception e) {
299          throw new RuntimeException(e);
300        }
301      });
302
303      // Second run of fixOverlap should fix all.
304      report = cj.getLastReport();
305      fixer.fixOverlaps(report);
306
307      HBaseTestingUtility.await(20, () -> {
308        try {
309          // Make sure it GC only once.
310          return (cj.scan() > 0);
311        } catch (Exception e) {
312          throw new RuntimeException(e);
313        }
314      });
315
316      // No holes reported.
317      cj.scan();
318      final CatalogJanitor.Report postReport = cj.getLastReport();
319      assertTrue(postReport.isEmpty());
320
321    } finally {
322      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset(
323        "hbase.master.metafixer.max.merge.count");
324
325      TEST_UTIL.deleteTable(tn);
326    }
327  }
328
329  /**
330   * This test covers the case that one of merged parent regions is a merged child region that
331   * has not been GCed but there is no reference files anymore. In this case, it will kick off
332   * a GC procedure, but no merge will happen.
333   */
334  @Test
335  public void testMergeWithMergedChildRegion() throws Exception {
336    TableName tn = TableName.valueOf(this.name.getMethodName());
337    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
338    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
339    assertTrue(ris.size() > 5);
340    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
341    CatalogJanitor cj = services.getCatalogJanitor();
342    cj.scan();
343    CatalogJanitor.Report report = cj.getLastReport();
344    assertTrue(report.isEmpty());
345    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
346
347    cj.scan();
348    report = cj.getLastReport();
349    assertEquals(2, report.getOverlaps().size());
350
351    // Mark it as a merged child region.
352    RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn).
353      setStartKey(overlapRegion.getStartKey()).
354      build();
355
356    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
357    Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
358      HConstants.LATEST_TIMESTAMP);
359    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
360    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(
361      putOfMerged.getRow()).
362      setFamily(HConstants.CATALOG_FAMILY).
363      setQualifier(Bytes.toBytes(qualifier)).
364      setTimestamp(putOfMerged.getTimestamp()).
365      setType(Cell.Type.Put).
366      setValue(RegionInfo.toByteArray(fakedParentRegion)).
367      build());
368
369    meta.put(putOfMerged);
370
371    MetaFixer fixer = new MetaFixer(services);
372    fixer.fixOverlaps(report);
373
374    // Wait until all procedures settled down
375    HBaseTestingUtility.await(200, () -> {
376      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
377    });
378
379    // No merge is done, overlap is still there.
380    cj.scan();
381    report = cj.getLastReport();
382    assertEquals(2, report.getOverlaps().size());
383
384    fixer.fixOverlaps(report);
385
386    // Wait until all procedures settled down
387    HBaseTestingUtility.await(200, () -> {
388      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
389    });
390
391    // Merge is done and no more overlaps
392    cj.scan();
393    report = cj.getLastReport();
394    assertEquals(0, report.getOverlaps().size());
395  }
396
397  /**
398   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so
399   * we can fix this condition. HBASE-24247
400   */
401  @Test
402  public void testOverlapWithMergeOfNonContiguous() throws Exception {
403    TableName tn = TableName.valueOf(this.name.getMethodName());
404    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
405    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
406    assertTrue(ris.size() > 5);
407    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
408    services.getCatalogJanitor().scan();
409    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
410    assertTrue(report.isEmpty());
411    // Make a simple overlap spanning second and third region.
412    makeOverlap(services, ris.get(1), ris.get(5));
413    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
414    RegionInfo deletedRegion = ris.get(3);
415    long pid = services.getAssignmentManager().unassign(deletedRegion);
416    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
417      Threads.sleep(100);
418    }
419    GCRegionProcedure procedure =
420      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
421    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
422    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
423      Threads.sleep(100);
424    }
425    services.getCatalogJanitor().scan();
426    report = services.getCatalogJanitor().getLastReport();
427    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
428    MetaFixer fixer = new MetaFixer(services);
429    fixer.fixOverlaps(report);
430    HBaseTestingUtility.await(10, () -> {
431      try {
432        services.getCatalogJanitor().scan();
433        final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
434        return postReport.isEmpty();
435      } catch (Exception e) {
436        throw new RuntimeException(e);
437      }
438    });
439  }
440}