001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023import java.io.IOException;
024import java.util.Collections;
025import java.util.HashSet;
026import java.util.List;
027import java.util.Map;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellBuilderFactory;
030import org.apache.hadoop.hbase.CellBuilderType;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HBaseTestingUtility;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MetaTableAccessor;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.Put;
037import org.apache.hadoop.hbase.client.RegionInfo;
038import org.apache.hadoop.hbase.client.RegionInfoBuilder;
039import org.apache.hadoop.hbase.client.Result;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
042import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
043import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
044import org.apache.hadoop.hbase.master.assignment.RegionStates;
045import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.MasterTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.Threads;
052import org.junit.AfterClass;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Rule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058import org.junit.rules.TestName;
059
060@Category({MasterTests.class, LargeTests.class})
061public class TestMetaFixer {
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064      HBaseClassTestRule.forClass(TestMetaFixer.class);
065  @Rule
066  public TestName name = new TestName();
067
068  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
069
070  @BeforeClass
071  public static void setupBeforeClass() throws Exception {
072    TEST_UTIL.startMiniCluster();
073  }
074
075  @AfterClass
076  public static void tearDownAfterClass() throws Exception {
077    TEST_UTIL.shutdownMiniCluster();
078  }
079
080  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
081    MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri);
082    // Delete it from Master context too else it sticks around.
083    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
084  }
085
086  @Test
087  public void testPlugsHoles() throws Exception {
088    TableName tn = TableName.valueOf(this.name.getMethodName());
089    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
090    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
091    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
092    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
093    services.getCatalogJanitor().scan();
094    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
095    assertTrue(report.isEmpty());
096    int originalCount = ris.size();
097    // Remove first, last and middle region. See if hole gets plugged. Table has 26 regions.
098    deleteRegion(services, ris.get(ris.size() -1));
099    deleteRegion(services, ris.get(3));
100    deleteRegion(services, ris.get(0));
101    assertEquals(initialSize - 3,
102        services.getAssignmentManager().getRegionStates().getRegionStates().size());
103    services.getCatalogJanitor().scan();
104    report = services.getCatalogJanitor().getLastReport();
105    assertEquals(report.toString(), 3, report.getHoles().size());
106    MetaFixer fixer = new MetaFixer(services);
107    fixer.fixHoles(report);
108    services.getCatalogJanitor().scan();
109    report = services.getCatalogJanitor().getLastReport();
110    assertTrue(report.toString(), report.isEmpty());
111    assertEquals(initialSize,
112        services.getAssignmentManager().getRegionStates().getRegionStates().size());
113
114    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
115    // watchdog TestRule terminates the test.
116    HBaseTestingUtility.await(50,
117      () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
118
119    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
120    assertEquals(originalCount, ris.size());
121  }
122
123  /**
124   * Just make sure running fixMeta does right thing for the case
125   * of a single-region Table where the region gets dropped.
126   * There is nothing much we can do. We can't restore what
127   * we don't know about (at least from a read of hbase:meta).
128   */
129  @Test
130  public void testOneRegionTable() throws IOException {
131    TableName tn = TableName.valueOf(this.name.getMethodName());
132    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
133    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
134    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
135    services.getCatalogJanitor().scan();
136    deleteRegion(services, ris.get(0));
137    services.getCatalogJanitor().scan();
138    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
139    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
140    assertTrue(ris.isEmpty());
141    MetaFixer fixer = new MetaFixer(services);
142    fixer.fixHoles(report);
143    report = services.getCatalogJanitor().getLastReport();
144    assertTrue(report.isEmpty());
145    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
146    assertEquals(0, ris.size());
147  }
148
149  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
150      throws IOException {
151    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()).
152        setStartKey(a.getStartKey()).
153        setEndKey(b.getEndKey()).
154        build();
155    MetaTableAccessor.putsToMetaTable(services.getConnection(),
156        Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
157            System.currentTimeMillis())));
158    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
159    services.getAssignmentManager().assign(overlapRegion);
160    return overlapRegion;
161  }
162
163  private void testOverlapCommon(final TableName tn) throws Exception {
164    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
165    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
166    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
167    assertTrue(ris.size() > 5);
168    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
169    services.getCatalogJanitor().scan();
170    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
171    assertTrue(report.isEmpty());
172    // Make a simple overlap spanning second and third region.
173    makeOverlap(services, ris.get(1), ris.get(3));
174    makeOverlap(services, ris.get(2), ris.get(3));
175    makeOverlap(services, ris.get(2), ris.get(4));
176  }
177
178  @Test
179  public void testOverlap() throws Exception {
180    TableName tn = TableName.valueOf(this.name.getMethodName());
181    testOverlapCommon(tn);
182    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
183    HbckChore hbckChore = services.getHbckChore();
184
185    CatalogJanitor cj = services.getCatalogJanitor();
186    cj.scan();
187    CatalogJanitor.Report report = cj.getLastReport();
188    assertEquals(6, report.getOverlaps().size());
189    assertEquals(1,
190      MetaFixer.calculateMerges(10, report.getOverlaps()).size());
191    MetaFixer fixer = new MetaFixer(services);
192    fixer.fixOverlaps(report);
193
194    HBaseTestingUtility. await(10, () -> {
195      try {
196        if (cj.scan() > 0) {
197          // It submits GC once, then it will immediately kick off another GC to test if
198          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
199          // a hole.
200          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
201          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
202            List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
203            if (parents != null) {
204              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
205              pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
206                e.getKey(), parents));
207            }
208          }
209          return true;
210        }
211        return false;
212      } catch (Exception e) {
213        throw new RuntimeException(e);
214      }
215    });
216
217    // Wait until all GCs settled down
218    HBaseTestingUtility.await(10, () -> {
219      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
220    });
221
222    // No orphan regions on FS
223    hbckChore.chore();
224    assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
225
226    // No holes reported.
227    cj.scan();
228    final CatalogJanitor.Report postReport = cj.getLastReport();
229    assertTrue(postReport.isEmpty());
230  }
231
232  @Test
233  public void testOverlapWithSmallMergeCount() throws Exception {
234    TableName tn = TableName.valueOf(this.name.getMethodName());
235    try {
236      testOverlapCommon(tn);
237      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
238      CatalogJanitor cj = services.getCatalogJanitor();
239      cj.scan();
240      CatalogJanitor.Report report = cj.getLastReport();
241      assertEquals(6, report.getOverlaps().size());
242      assertEquals(2,
243        MetaFixer.calculateMerges(5, report.getOverlaps()).size());
244
245      // The max merge count is set to 5 so overlap regions are divided into
246      // two merge requests.
247      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt(
248        "hbase.master.metafixer.max.merge.count", 5);
249
250      // Get overlap regions
251      HashSet<String> overlapRegions = new HashSet<>();
252      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
253        overlapRegions.add(pair.getFirst().getRegionNameAsString());
254        overlapRegions.add(pair.getSecond().getRegionNameAsString());
255      }
256
257      MetaFixer fixer = new MetaFixer(services);
258      fixer.fixOverlaps(report);
259      AssignmentManager am = services.getAssignmentManager();
260
261      HBaseTestingUtility.await(200, () -> {
262        try {
263          cj.scan();
264          final CatalogJanitor.Report postReport = cj.getLastReport();
265          RegionStates regionStates = am.getRegionStates();
266
267          // Make sure that two merged regions are opened and GCs are done.
268          if (postReport.getOverlaps().size() == 1) {
269            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
270            if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) &&
271              regionStates.getRegionState(pair.getFirst()).isOpened()) &&
272              (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) &&
273              regionStates.getRegionState(pair.getSecond()).isOpened())) {
274              // Make sure GC is done.
275              List<RegionInfo> firstParents = MetaTableAccessor.getMergeRegions(
276                services.getConnection(), pair.getFirst().getRegionName());
277              List<RegionInfo> secondParents = MetaTableAccessor.getMergeRegions(
278                services.getConnection(), pair.getSecond().getRegionName());
279
280              return (firstParents == null || firstParents.isEmpty()) &&
281                (secondParents == null || secondParents.isEmpty());
282            }
283          }
284          return false;
285        } catch (Exception e) {
286          throw new RuntimeException(e);
287        }
288      });
289
290      // Second run of fixOverlap should fix all.
291      report = cj.getLastReport();
292      fixer.fixOverlaps(report);
293
294      HBaseTestingUtility.await(20, () -> {
295        try {
296          // Make sure it GC only once.
297          return (cj.scan() > 0);
298        } catch (Exception e) {
299          throw new RuntimeException(e);
300        }
301      });
302
303      // No holes reported.
304      cj.scan();
305      final CatalogJanitor.Report postReport = cj.getLastReport();
306      assertTrue(postReport.isEmpty());
307
308    } finally {
309      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset(
310        "hbase.master.metafixer.max.merge.count");
311
312      TEST_UTIL.deleteTable(tn);
313    }
314  }
315
316  /**
317   * This test covers the case that one of merged parent regions is a merged child region that
318   * has not been GCed but there is no reference files anymore. In this case, it will kick off
319   * a GC procedure, but no merge will happen.
320   */
321  @Test
322  public void testMergeWithMergedChildRegion() throws Exception {
323    TableName tn = TableName.valueOf(this.name.getMethodName());
324    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
325    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
326    assertTrue(ris.size() > 5);
327    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
328    CatalogJanitor cj = services.getCatalogJanitor();
329    cj.scan();
330    CatalogJanitor.Report report = cj.getLastReport();
331    assertTrue(report.isEmpty());
332    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
333
334    cj.scan();
335    report = cj.getLastReport();
336    assertEquals(2, report.getOverlaps().size());
337
338    // Mark it as a merged child region.
339    RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn).
340      setStartKey(overlapRegion.getStartKey()).
341      build();
342
343    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
344    Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
345      HConstants.LATEST_TIMESTAMP);
346    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
347    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(
348      putOfMerged.getRow()).
349      setFamily(HConstants.CATALOG_FAMILY).
350      setQualifier(Bytes.toBytes(qualifier)).
351      setTimestamp(putOfMerged.getTimestamp()).
352      setType(Cell.Type.Put).
353      setValue(RegionInfo.toByteArray(fakedParentRegion)).
354      build());
355
356    meta.put(putOfMerged);
357
358    MetaFixer fixer = new MetaFixer(services);
359    fixer.fixOverlaps(report);
360
361    // Wait until all procedures settled down
362    HBaseTestingUtility.await(200, () -> {
363      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
364    });
365
366    // No merge is done, overlap is still there.
367    cj.scan();
368    report = cj.getLastReport();
369    assertEquals(2, report.getOverlaps().size());
370
371    fixer.fixOverlaps(report);
372
373    // Wait until all procedures settled down
374    HBaseTestingUtility.await(200, () -> {
375      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
376    });
377
378    // Merge is done and no more overlaps
379    cj.scan();
380    report = cj.getLastReport();
381    assertEquals(0, report.getOverlaps().size());
382  }
383
384  /**
385   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so
386   * we can fix this condition. HBASE-24247
387   */
388  @Test
389  public void testOverlapWithMergeOfNonContiguous() throws Exception {
390    TableName tn = TableName.valueOf(this.name.getMethodName());
391    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
392    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
393    assertTrue(ris.size() > 5);
394    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
395    services.getCatalogJanitor().scan();
396    CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport();
397    assertTrue(report.isEmpty());
398    // Make a simple overlap spanning second and third region.
399    makeOverlap(services, ris.get(1), ris.get(5));
400    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
401    RegionInfo deletedRegion = ris.get(3);
402    long pid = services.getAssignmentManager().unassign(deletedRegion);
403    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
404      Threads.sleep(100);
405    }
406    GCRegionProcedure procedure =
407      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
408    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
409    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
410      Threads.sleep(100);
411    }
412    services.getCatalogJanitor().scan();
413    report = services.getCatalogJanitor().getLastReport();
414    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
415    MetaFixer fixer = new MetaFixer(services);
416    fixer.fixOverlaps(report);
417    HBaseTestingUtility.await(10, () -> {
418      try {
419        services.getCatalogJanitor().scan();
420        final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
421        return postReport.isEmpty();
422      } catch (Exception e) {
423        throw new RuntimeException(e);
424      }
425    });
426  }
427}