001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.Admin;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.SnapshotDescription;
038import org.apache.hadoop.hbase.client.SnapshotType;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.client.TableDescriptor;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
043import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
044import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
045import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
046import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
047import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
048import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
049import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
050import org.apache.hadoop.hbase.regionserver.HRegion;
051import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
052import org.apache.hadoop.hbase.testclassification.LargeTests;
053import org.apache.hadoop.hbase.testclassification.MasterTests;
054import org.apache.hadoop.hbase.util.Bytes;
055import org.apache.hadoop.hbase.util.Threads;
056import org.junit.After;
057import org.junit.AfterClass;
058import org.junit.Before;
059import org.junit.BeforeClass;
060import org.junit.ClassRule;
061import org.junit.Rule;
062import org.junit.Test;
063import org.junit.experimental.categories.Category;
064import org.junit.rules.TestName;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
069import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
070
071@Category({ MasterTests.class, LargeTests.class })
072public class TestMergeTableRegionsProcedure {
073
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076    HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class);
077
078  private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class);
079  @Rule
080  public final TestName name = new TestName();
081
082  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
083
084  private static final int initialRegionCount = 4;
085  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
086  private static Admin admin;
087
088  private ProcedureMetrics mergeProcMetrics;
089  private ProcedureMetrics assignProcMetrics;
090  private ProcedureMetrics unassignProcMetrics;
091  private long mergeSubmittedCount = 0;
092  private long mergeFailedCount = 0;
093  private long assignSubmittedCount = 0;
094  private long assignFailedCount = 0;
095  private long unassignSubmittedCount = 0;
096  private long unassignFailedCount = 0;
097
098  private static void setupConf(Configuration conf) {
099    // Reduce the maximum attempts to speed up the test
100    conf.setInt("hbase.assignment.maximum.attempts", 3);
101    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
102    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
103    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
104    conf.set("hbase.coprocessor.region.classes",
105      RegionServerHostingReplicaSlowOpenCoprocessor.class.getName());
106  }
107
108  @BeforeClass
109  public static void setupCluster() throws Exception {
110    setupConf(UTIL.getConfiguration());
111    UTIL.startMiniCluster(1);
112    admin = UTIL.getAdmin();
113  }
114
115  @AfterClass
116  public static void cleanupTest() throws Exception {
117    UTIL.shutdownMiniCluster();
118  }
119
120  @Before
121  public void setup() throws Exception {
122    resetProcExecutorTestingKillFlag();
123    MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster());
124    MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster());
125    // Turn off balancer so it doesn't cut in and mess up our placements.
126    admin.balancerSwitch(false, true);
127    // Turn off the meta scanner so it don't remove parent on us.
128    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
129    resetProcExecutorTestingKillFlag();
130    AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
131    mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
132    assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
133    unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
134  }
135
136  @After
137  public void tearDown() throws Exception {
138    resetProcExecutorTestingKillFlag();
139    for (TableDescriptor htd : admin.listTableDescriptors()) {
140      LOG.info("Tear down, remove table=" + htd.getTableName());
141      UTIL.deleteTable(htd.getTableName());
142    }
143  }
144
145  private void resetProcExecutorTestingKillFlag() {
146    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
147    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
148    assertTrue("expected executor to be running", procExec.isRunning());
149  }
150
151  private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException {
152    List<Put> puts = new ArrayList<>();
153    for (RegionInfo ri : ris) {
154      Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0
155        ? new byte[] { 'a' }
156        : ri.getStartKey());
157      put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY,
158        HConstants.CATALOG_FAMILY);
159      puts.add(put);
160    }
161    t.put(puts);
162    return puts.size();
163  }
164
165  /**
166   * This tests two region merges
167   */
168  @Test
169  public void testMergeTwoRegions() throws Exception {
170    final TableName tableName = TableName.valueOf(this.name.getMethodName());
171    UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] {
172      new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } });
173    testMerge(tableName, 2);
174  }
175
176  private void testMerge(TableName tableName, int mergeCount) throws IOException {
177    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName);
178    int originalRegionCount = ris.size();
179    assertTrue(originalRegionCount > mergeCount);
180    RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {});
181    int countOfRowsLoaded = 0;
182    try (Table table = UTIL.getConnection().getTable(tableName)) {
183      countOfRowsLoaded = loadARowPerRegion(table, ris);
184    }
185    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
186
187    // collect AM metrics before test
188    collectAssignmentManagerMetrics();
189    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
190    MergeTableRegionsProcedure proc =
191      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
192    long procId = procExec.submitProcedure(proc);
193    ProcedureTestingUtility.waitProcedure(procExec, procId);
194    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
195    MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection());
196    assertEquals(originalRegionCount - mergeCount + 1,
197      MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size());
198
199    assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
200    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
201    assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
202    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
203    assertEquals(unassignSubmittedCount + mergeCount,
204      unassignProcMetrics.getSubmittedCounter().getCount());
205    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
206
207    // Need to get the references cleaned out. Close of region will move them
208    // to archive so disable and reopen just to get rid of references to later
209    // when the catalogjanitor runs, it can do merged region cleanup.
210    admin.disableTable(tableName);
211    admin.enableTable(tableName);
212
213    // Can I purge the merged regions from hbase:meta? Check that all went
214    // well by looking at the merged row up in hbase:meta. It should have no
215    // more mention of the merged regions; they are purged as last step in
216    // the merged regions cleanup.
217    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
218    UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow();
219    RegionInfo mergedRegion = proc.getMergedRegion();
220    RegionStateStore regionStateStore =
221      UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
222    while (ris != null && ris.get(0) != null && ris.get(1) != null) {
223      ris = regionStateStore.getMergeRegions(mergedRegion);
224      LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris);
225      Threads.sleep(1000);
226    }
227    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
228  }
229
230  /**
231   * This tests ten region merges in one go.
232   */
233  @Test
234  public void testMergeTenRegions() throws Exception {
235    final TableName tableName = TableName.valueOf(this.name.getMethodName());
236    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
237    UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY);
238    testMerge(tableName, 10);
239  }
240
241  /**
242   * This tests two concurrent region merges
243   */
244  @Test
245  public void testMergeRegionsConcurrently() throws Exception {
246    final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently");
247    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
248
249    List<RegionInfo> tableRegions = createTable(tableName);
250
251    RegionInfo[] regionsToMerge1 = new RegionInfo[2];
252    RegionInfo[] regionsToMerge2 = new RegionInfo[2];
253    regionsToMerge1[0] = tableRegions.get(0);
254    regionsToMerge1[1] = tableRegions.get(1);
255    regionsToMerge2[0] = tableRegions.get(2);
256    regionsToMerge2[1] = tableRegions.get(3);
257
258    // collect AM metrics before test
259    collectAssignmentManagerMetrics();
260
261    long procId1 = procExec.submitProcedure(
262      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true));
263    long procId2 = procExec.submitProcedure(
264      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true));
265    ProcedureTestingUtility.waitProcedure(procExec, procId1);
266    ProcedureTestingUtility.waitProcedure(procExec, procId2);
267    ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
268    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
269    assertRegionCount(tableName, initialRegionCount - 2);
270
271    assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
272    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
273    assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
274    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
275    assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
276    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
277  }
278
279  @Test
280  public void testRecoveryAndDoubleExecution() throws Exception {
281    final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution");
282    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
283
284    List<RegionInfo> tableRegions = createTable(tableName);
285
286    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
287    ProcedureTestingUtility.setKillIfHasParent(procExec, false);
288    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
289
290    RegionInfo[] regionsToMerge = new RegionInfo[2];
291    regionsToMerge[0] = tableRegions.get(0);
292    regionsToMerge[1] = tableRegions.get(1);
293
294    long procId = procExec.submitProcedure(
295      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
296
297    // Restart the executor and execute the step twice
298    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
299    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
300
301    assertRegionCount(tableName, initialRegionCount - 1);
302  }
303
304  @Test
305  public void testRollbackAndDoubleExecution() throws Exception {
306    final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution");
307    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
308
309    List<RegionInfo> tableRegions = createTable(tableName);
310
311    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
312    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
313
314    RegionInfo[] regionsToMerge = new RegionInfo[2];
315    regionsToMerge[0] = tableRegions.get(0);
316    regionsToMerge[1] = tableRegions.get(1);
317
318    long procId = procExec.submitProcedure(
319      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
320
321    // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback
322    // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is
323    // hardcoded, so you have to look at this test at least once when you add a new step.
324    int lastStep = 8;
325    MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true);
326    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
327    UTIL.waitUntilAllRegionsAssigned(tableName);
328    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName);
329    assertEquals(initialRegionCount, regions.size());
330  }
331
332  @Test
333  public void testMergeWithoutPONR() throws Exception {
334    final TableName tableName = TableName.valueOf("testMergeWithoutPONR");
335    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
336
337    List<RegionInfo> tableRegions = createTable(tableName);
338
339    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
340    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
341
342    RegionInfo[] regionsToMerge = new RegionInfo[2];
343    regionsToMerge[0] = tableRegions.get(0);
344    regionsToMerge[1] = tableRegions.get(1);
345
346    long procId = procExec.submitProcedure(
347      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
348
349    // Execute until step 9 of split procedure
350    // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META
351    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false);
352
353    // Unset Toggle Kill and make ProcExec work correctly
354    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
355    MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
356    ProcedureTestingUtility.waitProcedure(procExec, procId);
357
358    assertRegionCount(tableName, initialRegionCount - 1);
359  }
360
361  @Test
362  public void testMergingRegionWhileTakingSnapshot() throws Exception {
363    final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot");
364    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
365
366    List<RegionInfo> tableRegions = createTable(tableName);
367
368    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
369
370    SnapshotDescription snapshot =
371      new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH);
372    SnapshotProtos.SnapshotDescription snapshotProto =
373      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
374    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
375      UTIL.getHBaseCluster().getMaster().getConfiguration());
376    long snapshotProcId = procExec.submitProcedure(
377      new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto));
378    UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
379      snapshotProcId);
380
381    RegionInfo[] regionsToMerge = new RegionInfo[2];
382    regionsToMerge[0] = tableRegions.get(0);
383    regionsToMerge[1] = tableRegions.get(1);
384
385    long mergeProcId = procExec.submitProcedure(
386      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
387
388    ProcedureTestingUtility
389      .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId);
390    ProcedureTestingUtility.waitProcedure(
391      UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId);
392
393    assertProcFailed(procExec, mergeProcId);
394    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
395  }
396
397  @Test
398  public void testMergeDetectsModifyTableProcedure() throws Exception {
399    final TableName tableName = TableName.valueOf(name.getMethodName());
400    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
401
402    List<RegionInfo> regions = createTable(tableName);
403
404    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = true;
405    TableDescriptor td = TableDescriptorBuilder.newBuilder(admin.getDescriptor(tableName))
406      .setRegionReplication(2).build();
407    long modifyProcId =
408      procExec.submitProcedure(new ModifyTableProcedure(procExec.getEnvironment(), td));
409
410    // Merge regions of the table, the MergeTableRegionsProcedure will fail because there is a
411    // ModifyTableProcedure in progress
412    MergeTableRegionsProcedure mergeProcedure = new MergeTableRegionsProcedure(
413      procExec.getEnvironment(), regions.toArray(new RegionInfo[0]), false);
414    long mergeProcId = procExec.submitProcedure(mergeProcedure);
415    ProcedureTestingUtility.waitProcedure(procExec, mergeProcId);
416    ProcedureTestingUtility.assertProcFailed(procExec, mergeProcId);
417
418    RegionServerHostingReplicaSlowOpenCoprocessor.slowDownReplicaOpen = false;
419    ProcedureTestingUtility.waitProcedure(procExec, modifyProcId);
420    ProcedureTestingUtility.assertProcNotFailed(procExec, modifyProcId);
421  }
422
423  private List<RegionInfo> createTable(final TableName tableName) throws Exception {
424    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
425      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
426    byte[][] splitRows = new byte[initialRegionCount - 1][];
427    for (int i = 0; i < splitRows.length; ++i) {
428      splitRows[i] = Bytes.toBytes(String.format("%d", i));
429    }
430    admin.createTable(desc, splitRows);
431    return assertRegionCount(tableName, initialRegionCount);
432  }
433
434  public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions)
435    throws Exception {
436    UTIL.waitUntilNoRegionsInTransition();
437    List<RegionInfo> tableRegions = admin.getRegions(tableName);
438    assertEquals(nregions, tableRegions.size());
439    return tableRegions;
440  }
441
442  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
443    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
444  }
445
446  private void collectAssignmentManagerMetrics() {
447    mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
448    mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
449
450    assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
451    assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
452    unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
453    unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
454  }
455}