001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseTestingUtil;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.MetaTableAccessor;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.Admin;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.SnapshotDescription;
037import org.apache.hadoop.hbase.client.SnapshotType;
038import org.apache.hadoop.hbase.client.Table;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
042import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
043import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
044import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
045import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
047import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
048import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
049import org.apache.hadoop.hbase.regionserver.HRegion;
050import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
051import org.apache.hadoop.hbase.testclassification.LargeTests;
052import org.apache.hadoop.hbase.testclassification.MasterTests;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.Threads;
055import org.junit.jupiter.api.AfterAll;
056import org.junit.jupiter.api.AfterEach;
057import org.junit.jupiter.api.BeforeAll;
058import org.junit.jupiter.api.BeforeEach;
059import org.junit.jupiter.api.Tag;
060import org.junit.jupiter.api.Test;
061import org.junit.jupiter.api.TestInfo;
062import org.slf4j.Logger;
063import org.slf4j.LoggerFactory;
064
065import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
066import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
067
068@Tag(MasterTests.TAG)
069@Tag(LargeTests.TAG)
070public class TestMergeTableRegionsProcedure {
071
072  private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class);
073  private String testMethodName;
074
075  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
076
077  private static final int initialRegionCount = 4;
078  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
079  private static Admin admin;
080
081  private ProcedureMetrics mergeProcMetrics;
082  private ProcedureMetrics assignProcMetrics;
083  private ProcedureMetrics unassignProcMetrics;
084  private long mergeSubmittedCount = 0;
085  private long mergeFailedCount = 0;
086  private long assignSubmittedCount = 0;
087  private long assignFailedCount = 0;
088  private long unassignSubmittedCount = 0;
089  private long unassignFailedCount = 0;
090
091  private static void setupConf(Configuration conf) {
092    // Reduce the maximum attempts to speed up the test
093    conf.setInt("hbase.assignment.maximum.attempts", 3);
094    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
095    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
096    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
097    conf.set("hbase.coprocessor.region.classes",
098      RegionServerHostingReplicaSlowOpenCoprocessor.class.getName());
099  }
100
101  @BeforeAll
102  public static void setupCluster() throws Exception {
103    setupConf(UTIL.getConfiguration());
104    UTIL.startMiniCluster(1);
105    admin = UTIL.getAdmin();
106  }
107
108  @AfterAll
109  public static void cleanupTest() throws Exception {
110    UTIL.shutdownMiniCluster();
111  }
112
113  @BeforeEach
114  public void setup() throws Exception {
115    resetProcExecutorTestingKillFlag();
116    MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster());
117    MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster());
118    // Turn off balancer so it doesn't cut in and mess up our placements.
119    admin.balancerSwitch(false, true);
120    // Turn off the meta scanner so it don't remove parent on us.
121    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
122    resetProcExecutorTestingKillFlag();
123    AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
124    mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
125    assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
126    unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
127  }
128
129  @BeforeEach
130  public void setTestMethod(TestInfo testInfo) {
131    testMethodName = testInfo.getTestMethod().get().getName();
132  }
133
134  @AfterEach
135  public void tearDown() throws Exception {
136    resetProcExecutorTestingKillFlag();
137    for (TableDescriptor htd : admin.listTableDescriptors()) {
138      LOG.info("Tear down, remove table=" + htd.getTableName());
139      UTIL.deleteTable(htd.getTableName());
140    }
141  }
142
143  private void resetProcExecutorTestingKillFlag() {
144    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
145    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
146    assertTrue(procExec.isRunning(), "expected executor to be running");
147  }
148
149  private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException {
150    List<Put> puts = new ArrayList<>();
151    for (RegionInfo ri : ris) {
152      Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0
153        ? new byte[] { 'a' }
154        : ri.getStartKey());
155      put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY,
156        HConstants.CATALOG_FAMILY);
157      puts.add(put);
158    }
159    t.put(puts);
160    return puts.size();
161  }
162
163  /**
164   * This tests two region merges
165   */
166  @Test
167  public void testMergeTwoRegions() throws Exception {
168    final TableName tableName = TableName.valueOf(testMethodName);
169    UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] {
170      new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } });
171    testMerge(tableName, 2);
172  }
173
174  private void testMerge(TableName tableName, int mergeCount) throws IOException {
175    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName);
176    int originalRegionCount = ris.size();
177    assertTrue(originalRegionCount > mergeCount);
178    RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {});
179    int countOfRowsLoaded = 0;
180    try (Table table = UTIL.getConnection().getTable(tableName)) {
181      countOfRowsLoaded = loadARowPerRegion(table, ris);
182    }
183    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
184
185    // collect AM metrics before test
186    collectAssignmentManagerMetrics();
187    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
188    MergeTableRegionsProcedure proc =
189      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
190    long procId = procExec.submitProcedure(proc);
191    ProcedureTestingUtility.waitProcedure(procExec, procId);
192    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
193    MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection());
194    assertEquals(originalRegionCount - mergeCount + 1,
195      MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size());
196
197    assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
198    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
199    assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
200    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
201    assertEquals(unassignSubmittedCount + mergeCount,
202      unassignProcMetrics.getSubmittedCounter().getCount());
203    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
204
205    // Need to get the references cleaned out. Close of region will move them
206    // to archive so disable and reopen just to get rid of references to later
207    // when the catalogjanitor runs, it can do merged region cleanup.
208    admin.disableTable(tableName);
209    admin.enableTable(tableName);
210
211    // Can I purge the merged regions from hbase:meta? Check that all went
212    // well by looking at the merged row up in hbase:meta. It should have no
213    // more mention of the merged regions; they are purged as last step in
214    // the merged regions cleanup.
215    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
216    UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow();
217    RegionInfo mergedRegion = proc.getMergedRegion();
218    RegionStateStore regionStateStore =
219      UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
220    while (ris != null && ris.get(0) != null && ris.get(1) != null) {
221      ris = regionStateStore.getMergeRegions(mergedRegion);
222      LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris);
223      Threads.sleep(1000);
224    }
225    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
226  }
227
228  /**
229   * This tests ten region merges in one go.
230   */
231  @Test
232  public void testMergeTenRegions() throws Exception {
233    final TableName tableName = TableName.valueOf(testMethodName);
234    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
235    UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY);
236    testMerge(tableName, 10);
237  }
238
239  /**
240   * This tests two concurrent region merges
241   */
242  @Test
243  public void testMergeRegionsConcurrently() throws Exception {
244    final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently");
245    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
246
247    List<RegionInfo> tableRegions = createTable(tableName);
248
249    RegionInfo[] regionsToMerge1 = new RegionInfo[2];
250    RegionInfo[] regionsToMerge2 = new RegionInfo[2];
251    regionsToMerge1[0] = tableRegions.get(0);
252    regionsToMerge1[1] = tableRegions.get(1);
253    regionsToMerge2[0] = tableRegions.get(2);
254    regionsToMerge2[1] = tableRegions.get(3);
255
256    // collect AM metrics before test
257    collectAssignmentManagerMetrics();
258
259    long procId1 = procExec.submitProcedure(
260      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true));
261    long procId2 = procExec.submitProcedure(
262      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true));
263    ProcedureTestingUtility.waitProcedure(procExec, procId1);
264    ProcedureTestingUtility.waitProcedure(procExec, procId2);
265    ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
266    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
267    assertRegionCount(tableName, initialRegionCount - 2);
268
269    assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
270    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
271    assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
272    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
273    assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
274    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
275  }
276
277  @Test
278  public void testRecoveryAndDoubleExecution() throws Exception {
279    final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution");
280    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
281
282    List<RegionInfo> tableRegions = createTable(tableName);
283
284    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
285    ProcedureTestingUtility.setKillIfHasParent(procExec, false);
286    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
287
288    RegionInfo[] regionsToMerge = new RegionInfo[2];
289    regionsToMerge[0] = tableRegions.get(0);
290    regionsToMerge[1] = tableRegions.get(1);
291
292    long procId = procExec.submitProcedure(
293      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
294
295    // Restart the executor and execute the step twice
296    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
297    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
298
299    assertRegionCount(tableName, initialRegionCount - 1);
300  }
301
302  @Test
303  public void testRollbackAndDoubleExecution() throws Exception {
304    final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution");
305    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
306
307    List<RegionInfo> tableRegions = createTable(tableName);
308
309    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
310    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
311
312    RegionInfo[] regionsToMerge = new RegionInfo[2];
313    regionsToMerge[0] = tableRegions.get(0);
314    regionsToMerge[1] = tableRegions.get(1);
315
316    long procId = procExec.submitProcedure(
317      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
318
319    // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback
320    // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is
321    // hardcoded, so you have to look at this test at least once when you add a new step.
322    int lastStep = 8;
323    MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true);
324    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
325    UTIL.waitUntilAllRegionsAssigned(tableName);
326    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName);
327    assertEquals(initialRegionCount, regions.size());
328  }
329
330  @Test
331  public void testMergeWithoutPONR() throws Exception {
332    final TableName tableName = TableName.valueOf("testMergeWithoutPONR");
333    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
334
335    List<RegionInfo> tableRegions = createTable(tableName);
336
337    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
338    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
339
340    RegionInfo[] regionsToMerge = new RegionInfo[2];
341    regionsToMerge[0] = tableRegions.get(0);
342    regionsToMerge[1] = tableRegions.get(1);
343
344    long procId = procExec.submitProcedure(
345      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
346
347    // Execute until step 9 of split procedure
348    // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META
349    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false);
350
351    // Unset Toggle Kill and make ProcExec work correctly
352    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
353    MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
354    ProcedureTestingUtility.waitProcedure(procExec, procId);
355
356    assertRegionCount(tableName, initialRegionCount - 1);
357  }
358
359  @Test
360  public void testMergingRegionWhileTakingSnapshot() throws Exception {
361    final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot");
362    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
363
364    List<RegionInfo> tableRegions = createTable(tableName);
365
366    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
367
368    SnapshotDescription snapshot =
369      new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH);
370    SnapshotProtos.SnapshotDescription snapshotProto =
371      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
372    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
373      UTIL.getHBaseCluster().getMaster().getConfiguration());
374    long snapshotProcId = procExec.submitProcedure(
375      new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto));
376    UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
377      snapshotProcId);
378
379    RegionInfo[] regionsToMerge = new RegionInfo[2];
380    regionsToMerge[0] = tableRegions.get(0);
381    regionsToMerge[1] = tableRegions.get(1);
382
383    long mergeProcId = procExec.submitProcedure(
384      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
385
386    ProcedureTestingUtility
387      .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId);
388    ProcedureTestingUtility.waitProcedure(
389      UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId);
390
391    assertProcFailed(procExec, mergeProcId);
392    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
393  }
394
395  @Test
396  public void testMergeDetectsModifyTableProcedure() throws Exception {
397    final TableName tableName = TableName.valueOf(testMethodName);
398    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
399
400    List<RegionInfo> regions = createTable(tableName);
401
402    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true;
403    TableDescriptor td = TableDescriptorBuilder.newBuilder(admin.getDescriptor(tableName))
404      .setRegionReplication(2).build();
405    long modifyProcId =
406      procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), td));
407
408    // Merge regions of the table, the MergeTableRegionsProcedure will fail because there is a
409    // ModifyTableProcedure in progress
410    MergeTableRegionsProcedure mergeProcedure = new MergeTableRegionsProcedure(
411      procExec.getEnvironment(), regions.toArray(new RegionInfo[0]), false);
412    long mergeProcId = procExec.submitProcedure(mergeProcedure);
413    ProcedureTestingUtility.waitProcedure(procExec, mergeProcId);
414    ProcedureTestingUtility.assertProcFailed(procExec, mergeProcId);
415
416    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false;
417    ProcedureTestingUtility.waitProcedure(procExec, modifyProcId);
418    ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId);
419  }
420
421  private List<RegionInfo> createTable(final TableName tableName) throws Exception {
422    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
423      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
424    byte[][] splitRows = new byte[initialRegionCount - 1][];
425    for (int i = 0; i < splitRows.length; ++i) {
426      splitRows[i] = Bytes.toBytes(String.format("%d", i));
427    }
428    admin.createTable(desc, splitRows);
429    return assertRegionCount(tableName, initialRegionCount);
430  }
431
432  public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions)
433    throws Exception {
434    UTIL.waitUntilNoRegionsInTransition();
435    List<RegionInfo> tableRegions = admin.getRegions(tableName);
436    assertEquals(nregions, tableRegions.size());
437    return tableRegions;
438  }
439
440  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
441    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
442  }
443
444  private void collectAssignmentManagerMetrics() {
445    mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
446    mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
447
448    assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
449    assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
450    unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
451    unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
452  }
453}