001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.Admin;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.SnapshotDescription;
038import org.apache.hadoop.hbase.client.SnapshotType;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.client.TableDescriptor;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
043import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
044import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
045import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
047import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
048import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
049import org.apache.hadoop.hbase.regionserver.HRegion;
050import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
051import org.apache.hadoop.hbase.testclassification.LargeTests;
052import org.apache.hadoop.hbase.testclassification.MasterTests;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.Threads;
055import org.junit.After;
056import org.junit.AfterClass;
057import org.junit.Before;
058import org.junit.BeforeClass;
059import org.junit.ClassRule;
060import org.junit.Rule;
061import org.junit.Test;
062import org.junit.experimental.categories.Category;
063import org.junit.rules.TestName;
064import org.slf4j.Logger;
065import org.slf4j.LoggerFactory;
066
067import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
068import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
069
070@Category({ MasterTests.class, LargeTests.class })
071public class TestMergeTableRegionsProcedure {
072
073  @ClassRule
074  public static final HBaseClassTestRule CLASS_RULE =
075    HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class);
076
077  private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class);
078  @Rule
079  public final TestName name = new TestName();
080
081  private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();
082
083  private static final int initialRegionCount = 4;
084  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
085  private static Admin admin;
086
087  private ProcedureMetrics mergeProcMetrics;
088  private ProcedureMetrics assignProcMetrics;
089  private ProcedureMetrics unassignProcMetrics;
090  private long mergeSubmittedCount = 0;
091  private long mergeFailedCount = 0;
092  private long assignSubmittedCount = 0;
093  private long assignFailedCount = 0;
094  private long unassignSubmittedCount = 0;
095  private long unassignFailedCount = 0;
096
097  private static void setupConf(Configuration conf) {
098    // Reduce the maximum attempts to speed up the test
099    conf.setInt("hbase.assignment.maximum.attempts", 3);
100    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
101    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
102    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
103  }
104
105  @BeforeClass
106  public static void setupCluster() throws Exception {
107    setupConf(UTIL.getConfiguration());
108    UTIL.startMiniCluster(1);
109    admin = UTIL.getAdmin();
110  }
111
112  @AfterClass
113  public static void cleanupTest() throws Exception {
114    UTIL.shutdownMiniCluster();
115  }
116
117  @Before
118  public void setup() throws Exception {
119    resetProcExecutorTestingKillFlag();
120    MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster());
121    MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster());
122    // Turn off balancer so it doesn't cut in and mess up our placements.
123    admin.balancerSwitch(false, true);
124    // Turn off the meta scanner so it don't remove parent on us.
125    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
126    resetProcExecutorTestingKillFlag();
127    AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
128    mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
129    assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
130    unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
131  }
132
133  @After
134  public void tearDown() throws Exception {
135    resetProcExecutorTestingKillFlag();
136    for (TableDescriptor htd : admin.listTableDescriptors()) {
137      LOG.info("Tear down, remove table=" + htd.getTableName());
138      UTIL.deleteTable(htd.getTableName());
139    }
140  }
141
142  private void resetProcExecutorTestingKillFlag() {
143    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
144    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
145    assertTrue("expected executor to be running", procExec.isRunning());
146  }
147
148  private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException {
149    List<Put> puts = new ArrayList<>();
150    for (RegionInfo ri : ris) {
151      Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0
152        ? new byte[] { 'a' }
153        : ri.getStartKey());
154      put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY,
155        HConstants.CATALOG_FAMILY);
156      puts.add(put);
157    }
158    t.put(puts);
159    return puts.size();
160  }
161
162  /**
163   * This tests two region merges
164   */
165  @Test
166  public void testMergeTwoRegions() throws Exception {
167    final TableName tableName = TableName.valueOf(this.name.getMethodName());
168    UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] {
169      new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } });
170    testMerge(tableName, 2);
171  }
172
173  private void testMerge(TableName tableName, int mergeCount) throws IOException {
174    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName);
175    int originalRegionCount = ris.size();
176    assertTrue(originalRegionCount > mergeCount);
177    RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {});
178    int countOfRowsLoaded = 0;
179    try (Table table = UTIL.getConnection().getTable(tableName)) {
180      countOfRowsLoaded = loadARowPerRegion(table, ris);
181    }
182    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
183
184    // collect AM metrics before test
185    collectAssignmentManagerMetrics();
186    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
187    MergeTableRegionsProcedure proc =
188      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
189    long procId = procExec.submitProcedure(proc);
190    ProcedureTestingUtility.waitProcedure(procExec, procId);
191    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
192    MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection());
193    assertEquals(originalRegionCount - mergeCount + 1,
194      MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size());
195
196    assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
197    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
198    assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
199    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
200    assertEquals(unassignSubmittedCount + mergeCount,
201      unassignProcMetrics.getSubmittedCounter().getCount());
202    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
203
204    // Need to get the references cleaned out. Close of region will move them
205    // to archive so disable and reopen just to get rid of references to later
206    // when the catalogjanitor runs, it can do merged region cleanup.
207    admin.disableTable(tableName);
208    admin.enableTable(tableName);
209
210    // Can I purge the merged regions from hbase:meta? Check that all went
211    // well by looking at the merged row up in hbase:meta. It should have no
212    // more mention of the merged regions; they are purged as last step in
213    // the merged regions cleanup.
214    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
215    UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow();
216    RegionInfo mergedRegion = proc.getMergedRegion();
217    RegionStateStore regionStateStore =
218      UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore();
219    while (ris != null && ris.get(0) != null && ris.get(1) != null) {
220      ris = regionStateStore.getMergeRegions(mergedRegion);
221      LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris);
222      Threads.sleep(1000);
223    }
224    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
225  }
226
227  /**
228   * This tests ten region merges in one go.
229   */
230  @Test
231  public void testMergeTenRegions() throws Exception {
232    final TableName tableName = TableName.valueOf(this.name.getMethodName());
233    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
234    UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY);
235    testMerge(tableName, 10);
236  }
237
238  /**
239   * This tests two concurrent region merges
240   */
241  @Test
242  public void testMergeRegionsConcurrently() throws Exception {
243    final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently");
244    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
245
246    List<RegionInfo> tableRegions = createTable(tableName);
247
248    RegionInfo[] regionsToMerge1 = new RegionInfo[2];
249    RegionInfo[] regionsToMerge2 = new RegionInfo[2];
250    regionsToMerge1[0] = tableRegions.get(0);
251    regionsToMerge1[1] = tableRegions.get(1);
252    regionsToMerge2[0] = tableRegions.get(2);
253    regionsToMerge2[1] = tableRegions.get(3);
254
255    // collect AM metrics before test
256    collectAssignmentManagerMetrics();
257
258    long procId1 = procExec.submitProcedure(
259      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true));
260    long procId2 = procExec.submitProcedure(
261      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true));
262    ProcedureTestingUtility.waitProcedure(procExec, procId1);
263    ProcedureTestingUtility.waitProcedure(procExec, procId2);
264    ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
265    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
266    assertRegionCount(tableName, initialRegionCount - 2);
267
268    assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
269    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
270    assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
271    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
272    assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
273    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
274  }
275
276  @Test
277  public void testRecoveryAndDoubleExecution() throws Exception {
278    final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution");
279    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
280
281    List<RegionInfo> tableRegions = createTable(tableName);
282
283    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
284    ProcedureTestingUtility.setKillIfHasParent(procExec, false);
285    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
286
287    RegionInfo[] regionsToMerge = new RegionInfo[2];
288    regionsToMerge[0] = tableRegions.get(0);
289    regionsToMerge[1] = tableRegions.get(1);
290
291    long procId = procExec.submitProcedure(
292      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
293
294    // Restart the executor and execute the step twice
295    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
296    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
297
298    assertRegionCount(tableName, initialRegionCount - 1);
299  }
300
301  @Test
302  public void testRollbackAndDoubleExecution() throws Exception {
303    final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution");
304    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
305
306    List<RegionInfo> tableRegions = createTable(tableName);
307
308    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
309    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
310
311    RegionInfo[] regionsToMerge = new RegionInfo[2];
312    regionsToMerge[0] = tableRegions.get(0);
313    regionsToMerge[1] = tableRegions.get(1);
314
315    long procId = procExec.submitProcedure(
316      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
317
318    // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback
319    // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is
320    // hardcoded, so you have to look at this test at least once when you add a new step.
321    int lastStep = 8;
322    MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true);
323    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
324    UTIL.waitUntilAllRegionsAssigned(tableName);
325    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName);
326    assertEquals(initialRegionCount, regions.size());
327  }
328
329  @Test
330  public void testMergeWithoutPONR() throws Exception {
331    final TableName tableName = TableName.valueOf("testMergeWithoutPONR");
332    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
333
334    List<RegionInfo> tableRegions = createTable(tableName);
335
336    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
337    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
338
339    RegionInfo[] regionsToMerge = new RegionInfo[2];
340    regionsToMerge[0] = tableRegions.get(0);
341    regionsToMerge[1] = tableRegions.get(1);
342
343    long procId = procExec.submitProcedure(
344      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
345
346    // Execute until step 9 of split procedure
347    // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META
348    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false);
349
350    // Unset Toggle Kill and make ProcExec work correctly
351    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
352    MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
353    ProcedureTestingUtility.waitProcedure(procExec, procId);
354
355    assertRegionCount(tableName, initialRegionCount - 1);
356  }
357
358  @Test
359  public void testMergingRegionWhileTakingSnapshot() throws Exception {
360    final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot");
361    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
362
363    List<RegionInfo> tableRegions = createTable(tableName);
364
365    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
366
367    SnapshotDescription snapshot =
368      new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH);
369    SnapshotProtos.SnapshotDescription snapshotProto =
370      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
371    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
372      UTIL.getHBaseCluster().getMaster().getConfiguration());
373    long snapshotProcId = procExec.submitProcedure(
374      new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto));
375    UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
376      snapshotProcId);
377
378    RegionInfo[] regionsToMerge = new RegionInfo[2];
379    regionsToMerge[0] = tableRegions.get(0);
380    regionsToMerge[1] = tableRegions.get(1);
381
382    long mergeProcId = procExec.submitProcedure(
383      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
384
385    ProcedureTestingUtility
386      .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId);
387    ProcedureTestingUtility.waitProcedure(
388      UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId);
389
390    assertProcFailed(procExec, mergeProcId);
391    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
392  }
393
394  private List<RegionInfo> createTable(final TableName tableName) throws Exception {
395    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
396      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
397    byte[][] splitRows = new byte[initialRegionCount - 1][];
398    for (int i = 0; i < splitRows.length; ++i) {
399      splitRows[i] = Bytes.toBytes(String.format("%d", i));
400    }
401    admin.createTable(desc, splitRows);
402    return assertRegionCount(tableName, initialRegionCount);
403  }
404
405  public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions)
406    throws Exception {
407    UTIL.waitUntilNoRegionsInTransition();
408    List<RegionInfo> tableRegions = admin.getRegions(tableName);
409    assertEquals(nregions, tableRegions.size());
410    return tableRegions;
411  }
412
413  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
414    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
415  }
416
417  private void collectAssignmentManagerMetrics() {
418    mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
419    mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
420
421    assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
422    assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
423    unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
424    unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
425  }
426}