001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.List;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.MetaTableAccessor;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.Admin;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.client.TableDescriptor;
038import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
039import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
040import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
041import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
043import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
044import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
045import org.apache.hadoop.hbase.regionserver.HRegion;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.MasterTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.Threads;
050import org.junit.After;
051import org.junit.AfterClass;
052import org.junit.Before;
053import org.junit.BeforeClass;
054import org.junit.ClassRule;
055import org.junit.Rule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058import org.junit.rules.TestName;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062@Category({ MasterTests.class, LargeTests.class })
063public class TestMergeTableRegionsProcedure {
064
065  @ClassRule
066  public static final HBaseClassTestRule CLASS_RULE =
067    HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class);
068
069  private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class);
070  @Rule
071  public final TestName name = new TestName();
072
073  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
074
075  private static final int initialRegionCount = 4;
076  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
077  private static Admin admin;
078
079  private ProcedureMetrics mergeProcMetrics;
080  private ProcedureMetrics assignProcMetrics;
081  private ProcedureMetrics unassignProcMetrics;
082  private long mergeSubmittedCount = 0;
083  private long mergeFailedCount = 0;
084  private long assignSubmittedCount = 0;
085  private long assignFailedCount = 0;
086  private long unassignSubmittedCount = 0;
087  private long unassignFailedCount = 0;
088
089  private static void setupConf(Configuration conf) {
090    // Reduce the maximum attempts to speed up the test
091    conf.setInt("hbase.assignment.maximum.attempts", 3);
092    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
093    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
094    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
095  }
096
097  @BeforeClass
098  public static void setupCluster() throws Exception {
099    setupConf(UTIL.getConfiguration());
100    UTIL.startMiniCluster(1);
101    admin = UTIL.getAdmin();
102  }
103
104  @AfterClass
105  public static void cleanupTest() throws Exception {
106    UTIL.shutdownMiniCluster();
107  }
108
109  @Before
110  public void setup() throws Exception {
111    resetProcExecutorTestingKillFlag();
112    MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster());
113    MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster());
114    // Turn off balancer so it doesn't cut in and mess up our placements.
115    admin.balancerSwitch(false, true);
116    // Turn off the meta scanner so it don't remove parent on us.
117    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
118    resetProcExecutorTestingKillFlag();
119    AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
120    mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
121    assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
122    unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
123  }
124
125  @After
126  public void tearDown() throws Exception {
127    resetProcExecutorTestingKillFlag();
128    for (TableDescriptor htd : admin.listTableDescriptors()) {
129      LOG.info("Tear down, remove table=" + htd.getTableName());
130      UTIL.deleteTable(htd.getTableName());
131    }
132  }
133
134  private void resetProcExecutorTestingKillFlag() {
135    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
136    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
137    assertTrue("expected executor to be running", procExec.isRunning());
138  }
139
140  private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException {
141    List<Put> puts = new ArrayList<>();
142    for (RegionInfo ri : ris) {
143      Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0
144        ? new byte[] { 'a' }
145        : ri.getStartKey());
146      put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY,
147        HConstants.CATALOG_FAMILY);
148      puts.add(put);
149    }
150    t.put(puts);
151    return puts.size();
152  }
153
154  /**
155   * This tests two region merges
156   */
157  @Test
158  public void testMergeTwoRegions() throws Exception {
159    final TableName tableName = TableName.valueOf(this.name.getMethodName());
160    UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] {
161      new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } });
162    testMerge(tableName, 2);
163  }
164
165  private void testMerge(TableName tableName, int mergeCount) throws IOException {
166    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName);
167    int originalRegionCount = ris.size();
168    assertTrue(originalRegionCount > mergeCount);
169    RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {});
170    int countOfRowsLoaded = 0;
171    try (Table table = UTIL.getConnection().getTable(tableName)) {
172      countOfRowsLoaded = loadARowPerRegion(table, ris);
173    }
174    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
175
176    // collect AM metrics before test
177    collectAssignmentManagerMetrics();
178    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
179    MergeTableRegionsProcedure proc =
180      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
181    long procId = procExec.submitProcedure(proc);
182    ProcedureTestingUtility.waitProcedure(procExec, procId);
183    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
184    MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection());
185    assertEquals(originalRegionCount - mergeCount + 1,
186      MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size());
187
188    assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
189    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
190    assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
191    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
192    assertEquals(unassignSubmittedCount + mergeCount,
193      unassignProcMetrics.getSubmittedCounter().getCount());
194    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
195
196    // Need to get the references cleaned out. Close of region will move them
197    // to archive so disable and reopen just to get rid of references to later
198    // when the catalogjanitor runs, it can do merged region cleanup.
199    admin.disableTable(tableName);
200    admin.enableTable(tableName);
201
202    // Can I purge the merged regions from hbase:meta? Check that all went
203    // well by looking at the merged row up in hbase:meta. It should have no
204    // more mention of the merged regions; they are purged as last step in
205    // the merged regions cleanup.
206    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
207    UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow();
208    byte[] mergedRegion = proc.getMergedRegion().getRegionName();
209    while (ris != null && ris.get(0) != null && ris.get(1) != null) {
210      ris = MetaTableAccessor.getMergeRegions(UTIL.getConnection(), mergedRegion);
211      LOG.info("{} {}", Bytes.toStringBinary(mergedRegion), ris);
212      Threads.sleep(1000);
213    }
214    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
215  }
216
217  /**
218   * This tests ten region merges in one go.
219   */
220  @Test
221  public void testMergeTenRegions() throws Exception {
222    final TableName tableName = TableName.valueOf(this.name.getMethodName());
223    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
224    UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY);
225    testMerge(tableName, 10);
226  }
227
228  /**
229   * This tests two concurrent region merges
230   */
231  @Test
232  public void testMergeRegionsConcurrently() throws Exception {
233    final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently");
234    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
235
236    List<RegionInfo> tableRegions = createTable(tableName);
237
238    RegionInfo[] regionsToMerge1 = new RegionInfo[2];
239    RegionInfo[] regionsToMerge2 = new RegionInfo[2];
240    regionsToMerge1[0] = tableRegions.get(0);
241    regionsToMerge1[1] = tableRegions.get(1);
242    regionsToMerge2[0] = tableRegions.get(2);
243    regionsToMerge2[1] = tableRegions.get(3);
244
245    // collect AM metrics before test
246    collectAssignmentManagerMetrics();
247
248    long procId1 = procExec.submitProcedure(
249      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true));
250    long procId2 = procExec.submitProcedure(
251      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true));
252    ProcedureTestingUtility.waitProcedure(procExec, procId1);
253    ProcedureTestingUtility.waitProcedure(procExec, procId2);
254    ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
255    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
256    assertRegionCount(tableName, initialRegionCount - 2);
257
258    assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
259    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
260    assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
261    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
262    assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
263    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
264  }
265
266  @Test
267  public void testRecoveryAndDoubleExecution() throws Exception {
268    final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution");
269    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
270
271    List<RegionInfo> tableRegions = createTable(tableName);
272
273    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
274    ProcedureTestingUtility.setKillIfHasParent(procExec, false);
275    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
276
277    RegionInfo[] regionsToMerge = new RegionInfo[2];
278    regionsToMerge[0] = tableRegions.get(0);
279    regionsToMerge[1] = tableRegions.get(1);
280
281    long procId = procExec.submitProcedure(
282      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
283
284    // Restart the executor and execute the step twice
285    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
286    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
287
288    assertRegionCount(tableName, initialRegionCount - 1);
289  }
290
291  @Test
292  public void testRollbackAndDoubleExecution() throws Exception {
293    final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution");
294    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
295
296    List<RegionInfo> tableRegions = createTable(tableName);
297
298    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
299    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
300
301    RegionInfo[] regionsToMerge = new RegionInfo[2];
302    regionsToMerge[0] = tableRegions.get(0);
303    regionsToMerge[1] = tableRegions.get(1);
304
305    long procId = procExec.submitProcedure(
306      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
307
308    // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback
309    // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is
310    // hardcoded, so you have to look at this test at least once when you add a new step.
311    int lastStep = 8;
312    MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true);
313    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
314    UTIL.waitUntilAllRegionsAssigned(tableName);
315    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName);
316    assertEquals(initialRegionCount, regions.size());
317  }
318
319  @Test
320  public void testMergeWithoutPONR() throws Exception {
321    final TableName tableName = TableName.valueOf("testMergeWithoutPONR");
322    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
323
324    List<RegionInfo> tableRegions = createTable(tableName);
325
326    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
327    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
328
329    RegionInfo[] regionsToMerge = new RegionInfo[2];
330    regionsToMerge[0] = tableRegions.get(0);
331    regionsToMerge[1] = tableRegions.get(1);
332
333    long procId = procExec.submitProcedure(
334      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
335
336    // Execute until step 9 of split procedure
337    // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META
338    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false);
339
340    // Unset Toggle Kill and make ProcExec work correctly
341    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
342    MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
343    ProcedureTestingUtility.waitProcedure(procExec, procId);
344
345    assertRegionCount(tableName, initialRegionCount - 1);
346  }
347
348  private List<RegionInfo> createTable(final TableName tableName) throws Exception {
349    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
350      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
351    byte[][] splitRows = new byte[initialRegionCount - 1][];
352    for (int i = 0; i < splitRows.length; ++i) {
353      splitRows[i] = Bytes.toBytes(String.format("%d", i));
354    }
355    admin.createTable(desc, splitRows);
356    return assertRegionCount(tableName, initialRegionCount);
357  }
358
359  public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions)
360    throws Exception {
361    UTIL.waitUntilNoRegionsInTransition();
362    List<RegionInfo> tableRegions = admin.getRegions(tableName);
363    assertEquals(nregions, tableRegions.size());
364    return tableRegions;
365  }
366
367  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
368    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
369  }
370
371  private void collectAssignmentManagerMetrics() {
372    mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
373    mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
374
375    assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
376    assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
377    unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
378    unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
379  }
380}