001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNotSame;
026import static org.junit.Assert.assertNull;
027import static org.junit.Assert.assertTrue;
028import static org.junit.Assert.fail;
029
030import java.io.IOException;
031import java.lang.reflect.Field;
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.List;
035import java.util.Map;
036import java.util.Optional;
037import java.util.concurrent.CountDownLatch;
038import java.util.concurrent.ExecutionException;
039import java.util.concurrent.TimeUnit;
040import java.util.concurrent.TimeoutException;
041import java.util.concurrent.atomic.AtomicBoolean;
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.Path;
045import org.apache.hadoop.hbase.CellComparator;
046import org.apache.hadoop.hbase.Coprocessor;
047import org.apache.hadoop.hbase.CoprocessorEnvironment;
048import org.apache.hadoop.hbase.DoNotRetryIOException;
049import org.apache.hadoop.hbase.HBaseClassTestRule;
050import org.apache.hadoop.hbase.HBaseTestingUtil;
051import org.apache.hadoop.hbase.HConstants;
052import org.apache.hadoop.hbase.MasterNotRunningException;
053import org.apache.hadoop.hbase.PrivateCellUtil;
054import org.apache.hadoop.hbase.ServerName;
055import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
056import org.apache.hadoop.hbase.StartTestingClusterOption;
057import org.apache.hadoop.hbase.TableName;
058import org.apache.hadoop.hbase.ZooKeeperConnectionException;
059import org.apache.hadoop.hbase.client.Admin;
060import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
061import org.apache.hadoop.hbase.client.Consistency;
062import org.apache.hadoop.hbase.client.Delete;
063import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
064import org.apache.hadoop.hbase.client.Get;
065import org.apache.hadoop.hbase.client.Mutation;
066import org.apache.hadoop.hbase.client.Put;
067import org.apache.hadoop.hbase.client.RegionInfo;
068import org.apache.hadoop.hbase.client.Result;
069import org.apache.hadoop.hbase.client.ResultScanner;
070import org.apache.hadoop.hbase.client.Scan;
071import org.apache.hadoop.hbase.client.Table;
072import org.apache.hadoop.hbase.client.TableDescriptor;
073import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
074import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
075import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
077import org.apache.hadoop.hbase.coprocessor.MasterObserver;
078import org.apache.hadoop.hbase.coprocessor.ObserverContext;
079import org.apache.hadoop.hbase.io.HFileLink;
080import org.apache.hadoop.hbase.io.Reference;
081import org.apache.hadoop.hbase.master.HMaster;
082import org.apache.hadoop.hbase.master.MasterRpcServices;
083import org.apache.hadoop.hbase.master.RegionState;
084import org.apache.hadoop.hbase.master.RegionState.State;
085import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
086import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
087import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
088import org.apache.hadoop.hbase.master.assignment.RegionStates;
089import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
090import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
091import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
092import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
093import org.apache.hadoop.hbase.testclassification.LargeTests;
094import org.apache.hadoop.hbase.testclassification.RegionServerTests;
095import org.apache.hadoop.hbase.util.Bytes;
096import org.apache.hadoop.hbase.util.CommonFSUtils;
097import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
098import org.apache.hadoop.hbase.util.FSUtils;
099import org.apache.hadoop.hbase.util.FutureUtils;
100import org.apache.hadoop.hbase.util.HBaseFsck;
101import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
102import org.apache.hadoop.hbase.util.Threads;
103import org.apache.zookeeper.KeeperException;
104import org.apache.zookeeper.KeeperException.NodeExistsException;
105import org.junit.After;
106import org.junit.AfterClass;
107import org.junit.Assert;
108import org.junit.Before;
109import org.junit.BeforeClass;
110import org.junit.ClassRule;
111import org.junit.Rule;
112import org.junit.Test;
113import org.junit.experimental.categories.Category;
114import org.junit.rules.TestName;
115import org.mockito.Mockito;
116import org.slf4j.Logger;
117import org.slf4j.LoggerFactory;
118
119import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
120import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
121import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
122
123import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
124import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
125import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
127
128/**
129 * The below tests are testing split region against a running cluster
130 */
131@Category({ RegionServerTests.class, LargeTests.class })
132public class TestSplitTransactionOnCluster {
133
134  @ClassRule
135  public static final HBaseClassTestRule CLASS_RULE =
136    HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class);
137
138  private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class);
139  private Admin admin = null;
140  private SingleProcessHBaseCluster cluster = null;
141  private static final int NB_SERVERS = 3;
142
143  static final HBaseTestingUtil TESTING_UTIL = new HBaseTestingUtil();
144
145  @Rule
146  public TestName name = new TestName();
147
148  @BeforeClass
149  public static void before() throws Exception {
150    TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
151    StartTestingClusterOption option = StartTestingClusterOption.builder()
152      .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build();
153    TESTING_UTIL.startMiniCluster(option);
154  }
155
156  @AfterClass
157  public static void after() throws Exception {
158    TESTING_UTIL.shutdownMiniCluster();
159  }
160
161  @Before
162  public void setup() throws IOException {
163    TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
164    this.admin = TESTING_UTIL.getAdmin();
165    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
166  }
167
168  @After
169  public void tearDown() throws Exception {
170    this.admin.close();
171    for (TableDescriptor htd : this.admin.listTableDescriptors()) {
172      LOG.info("Tear down, remove table=" + htd.getTableName());
173      TESTING_UTIL.deleteTable(htd.getTableName());
174    }
175  }
176
177  private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
178    throws IOException, InterruptedException {
179    assertEquals(1, regions.size());
180    RegionInfo hri = regions.get(0).getRegionInfo();
181    AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
182    return hri;
183  }
184
185  private void requestSplitRegion(final HRegionServer rsServer, final Region region,
186    final byte[] midKey) throws IOException {
187    long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0);
188    // wait for the split to complete or get interrupted. If the split completes successfully,
189    // the procedure will return true; if the split fails, the procedure would throw exception.
190    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
191  }
192
193  @Test
194  public void testRITStateForRollback() throws Exception {
195    final TableName tableName = TableName.valueOf(name.getMethodName());
196    final HMaster master = cluster.getMaster();
197    try {
198      // Create table then get the single region for our new table.
199      Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
200      final List<HRegion> regions = cluster.getRegions(tableName);
201      final RegionInfo hri = getAndCheckSingleTableRegion(regions);
202      insertData(tableName, admin, t);
203      t.close();
204
205      // Turn off balancer so it doesn't cut in and mess up our placements.
206      this.admin.balancerSwitch(false, true);
207      // Turn off the meta scanner so it don't remove parent on us.
208      master.setCatalogJanitorEnabled(false);
209
210      // find a splittable region
211      final HRegion region = findSplittableRegion(regions);
212      assertTrue("not able to find a splittable region", region != null);
213
214      // install master co-processor to fail splits
215      master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class,
216        Coprocessor.PRIORITY_USER, master.getConfiguration());
217
218      // split async
219      this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 });
220
221      // we have to wait until the SPLITTING state is seen by the master
222      FailingSplitMasterObserver observer =
223        master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class);
224      assertNotNull(observer);
225      observer.latch.await();
226
227      LOG.info("Waiting for region to come out of RIT");
228      while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) {
229        Threads.sleep(100);
230      }
231      assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri));
232    } finally {
233      admin.balancerSwitch(true, false);
234      master.setCatalogJanitorEnabled(true);
235      abortAndWaitForMaster();
236      TESTING_UTIL.deleteTable(tableName);
237    }
238  }
239
240  @Test
241  public void testSplitFailedCompactionAndSplit() throws Exception {
242    final TableName tableName = TableName.valueOf(name.getMethodName());
243    // Create table then get the single region for our new table.
244    byte[] cf = Bytes.toBytes("cf");
245    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
246      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
247    admin.createTable(htd);
248
249    for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) {
250      Thread.sleep(100);
251    }
252    assertEquals(1, cluster.getRegions(tableName).size());
253
254    HRegion region = cluster.getRegions(tableName).get(0);
255    HStore store = region.getStore(cf);
256    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
257    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
258
259    Table t = TESTING_UTIL.getConnection().getTable(tableName);
260    // insert data
261    insertData(tableName, admin, t);
262    insertData(tableName, admin, t);
263
264    int fileNum = store.getStorefiles().size();
265    // 0, Compaction Request
266    store.triggerMajorCompaction();
267    Optional<CompactionContext> cc = store.requestCompaction();
268    assertTrue(cc.isPresent());
269    // 1, A timeout split
270    // 1.1 close region
271    assertEquals(2, region.close(false).get(cf).size());
272    // 1.2 rollback and Region initialize again
273    region.initialize();
274
275    // 2, Run Compaction cc
276    assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE));
277    assertTrue(fileNum > store.getStorefiles().size());
278
279    // 3, Split
280    requestSplitRegion(regionServer, region, Bytes.toBytes("row3"));
281    assertEquals(2, cluster.getRegions(tableName).size());
282  }
283
284  @Test
285  public void testSplitCompactWithPriority() throws Exception {
286    final TableName tableName = TableName.valueOf(name.getMethodName());
287    // Create table then get the single region for our new table.
288    byte[] cf = Bytes.toBytes("cf");
289    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
290      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
291    admin.createTable(htd);
292
293    assertNotEquals("Unable to retrieve regions of the table", -1,
294      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
295
296    HRegion region = cluster.getRegions(tableName).get(0);
297    HStore store = region.getStore(cf);
298    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
299    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
300
301    Table table = TESTING_UTIL.getConnection().getTable(tableName);
302    // insert data
303    insertData(tableName, admin, table);
304    insertData(tableName, admin, table, 20);
305    insertData(tableName, admin, table, 40);
306
307    // Compaction Request
308    store.triggerMajorCompaction();
309    Optional<CompactionContext> compactionContext = store.requestCompaction();
310    assertTrue(compactionContext.isPresent());
311    assertFalse(compactionContext.get().getRequest().isAfterSplit());
312    assertEquals(compactionContext.get().getRequest().getPriority(), 13);
313
314    // Split
315    long procId =
316      cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0);
317
318    // wait for the split to complete or get interrupted. If the split completes successfully,
319    // the procedure will return true; if the split fails, the procedure would throw exception.
320    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
321    Thread.sleep(3000);
322    assertNotEquals("Table is not split properly?", -1,
323      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
324    // we have 2 daughter regions
325    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
326    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
327    HStore hStore1 = hRegion1.getStore(cf);
328    HStore hStore2 = hRegion2.getStore(cf);
329
330    // For hStore1 && hStore2, set mock reference to one of the storeFiles
331    StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo();
332    StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo();
333    Field field = StoreFileInfo.class.getDeclaredField("reference");
334    field.setAccessible(true);
335    field.set(storeFileInfo1, Mockito.mock(Reference.class));
336    field.set(storeFileInfo2, Mockito.mock(Reference.class));
337    hStore1.triggerMajorCompaction();
338    hStore2.triggerMajorCompaction();
339
340    compactionContext = hStore1.requestCompaction();
341    assertTrue(compactionContext.isPresent());
342    // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true &&
343    // highest priority for hStore1's compactionContext
344    assertTrue(compactionContext.get().getRequest().isAfterSplit());
345    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000);
346
347    compactionContext =
348      hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null);
349    assertTrue(compactionContext.isPresent());
350    // compaction request contains higher priority than default priority of daughter region
351    // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to
352    // be accepted.
353    assertTrue(compactionContext.get().getRequest().isAfterSplit());
354    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10);
355    admin.disableTable(tableName);
356    admin.deleteTable(tableName);
357  }
358
359  @Test
360  public void testContinuousSplitUsingLinkFile() throws Exception {
361    final TableName tableName = TableName.valueOf(name.getMethodName());
362    // Create table then get the single region for our new table.
363    byte[] cf = Bytes.toBytes("cf");
364    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName)
365      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));
366    String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName();
367    builder.setValue(SPLIT_POLICY, splitPolicy);
368
369    admin.createTable(builder.build());
370    admin.compactionSwitch(false, new ArrayList<>());
371
372    assertNotEquals("Unable to retrieve regions of the table", -1,
373      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
374    Table table = TESTING_UTIL.getConnection().getTable(tableName);
375    // insert data
376    insertData(tableName, admin, table, 10);
377    insertData(tableName, admin, table, 20);
378    insertData(tableName, admin, table, 40);
379    int rowCount = 3 * 4;
380    Scan scan = new Scan();
381    scanValidate(scan, rowCount, table);
382
383    // Split
384    admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(),
385      Bytes.toBytes("row14"));
386    // wait for the split to complete or get interrupted. If the split completes successfully,
387    // the procedure will return true; if the split fails, the procedure would throw exception.
388    Thread.sleep(3000);
389    assertNotEquals("Table is not split properly?", -1,
390      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
391    // we have 2 daughter regions
392    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
393    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
394    HStore hStore1 = hRegion1.getStore(cf);
395    HStore hStore2 = hRegion2.getStore(cf);
396    // the sum of store files of the two children should be equal to their parent
397    assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount());
398    // both the two children should have link files
399    for (StoreFile sf : hStore1.getStorefiles()) {
400      assertTrue(HFileLink.isHFileLink(sf.getPath()));
401    }
402    for (StoreFile sf : hStore2.getStorefiles()) {
403      assertTrue(HFileLink.isHFileLink(sf.getPath()));
404    }
405    // validate children data
406    scan = new Scan();
407    scanValidate(scan, rowCount, table);
408
409    // Continuous Split
410    findRegionToSplit(tableName, "row24");
411    Thread.sleep(3000);
412    assertNotEquals("Table is not split properly?", -1,
413      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3));
414    // now table has 3 region, each region should have one link file
415    for (HRegion newRegion : cluster.getRegions(tableName)) {
416      assertEquals(1, newRegion.getStore(cf).getStorefilesCount());
417      assertTrue(
418        HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath()));
419    }
420
421    scan = new Scan();
422    scanValidate(scan, rowCount, table);
423
424    // Continuous Split, random split HFileLink, generate Reference files.
425    // After this, can not continuous split, because there are reference files.
426    findRegionToSplit(tableName, "row11");
427    Thread.sleep(3000);
428    assertNotEquals("Table is not split properly?", -1,
429      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4));
430
431    scan = new Scan();
432    scanValidate(scan, rowCount, table);
433  }
434
435  private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception {
436    HRegion toSplit = null;
437    byte[] toSplitKey = Bytes.toBytes(splitRowKey);
438    for (HRegion rg : cluster.getRegions(tableName)) {
439      LOG.debug(
440        "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()="
441          + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey);
442      if (
443        (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare(
444          PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()),
445          PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0)
446          && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare(
447            PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()),
448            PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0)
449      ) {
450        toSplit = rg;
451      }
452    }
453    assertNotNull(toSplit);
454    admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey);
455  }
456
457  private static void scanValidate(Scan scan, int expectedRowCount, Table table)
458    throws IOException {
459    ResultScanner scanner = table.getScanner(scan);
460    int rows = 0;
461    for (Result result : scanner) {
462      rows++;
463    }
464    scanner.close();
465    assertEquals(expectedRowCount, rows);
466  }
467
468  public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver {
469    volatile CountDownLatch latch;
470
471    @Override
472    public void start(CoprocessorEnvironment e) throws IOException {
473      latch = new CountDownLatch(1);
474    }
475
476    @Override
477    public Optional<MasterObserver> getMasterObserver() {
478      return Optional.of(this);
479    }
480
481    @Override
482    public void preSplitRegionBeforeMETAAction(
483      final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey,
484      final List<Mutation> metaEntries) throws IOException {
485      latch.countDown();
486      throw new IOException("Causing rollback of region split");
487    }
488  }
489
490  @Test
491  public void testSplitRollbackOnRegionClosing() throws Exception {
492    final TableName tableName = TableName.valueOf(name.getMethodName());
493
494    // Create table then get the single region for our new table.
495    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
496    List<HRegion> regions = cluster.getRegions(tableName);
497    RegionInfo hri = getAndCheckSingleTableRegion(regions);
498
499    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
500
501    RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
502
503    // Turn off balancer so it doesn't cut in and mess up our placements.
504    this.admin.balancerSwitch(false, true);
505    // Turn off the meta scanner so it don't remove parent on us.
506    cluster.getMaster().setCatalogJanitorEnabled(false);
507    try {
508      // Add a bit of load up into the table so splittable.
509      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
510      // Get region pre-split.
511      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
512      printOutRegions(server, "Initial regions: ");
513      int regionCount = cluster.getRegions(hri.getTable()).size();
514      regionStates.updateRegionState(hri, RegionState.State.CLOSING);
515
516      // Now try splitting.... should fail. And each should successfully
517      // rollback.
518      // We don't roll back here anymore. Instead we fail-fast on construction of the
519      // split transaction. Catch the exception instead.
520      try {
521        FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName()));
522        fail();
523      } catch (DoNotRetryRegionException e) {
524        // Expected
525      }
526      // Wait around a while and assert count of regions remains constant.
527      for (int i = 0; i < 10; i++) {
528        Thread.sleep(100);
529        assertEquals(regionCount, cluster.getRegions(hri.getTable()).size());
530      }
531      regionStates.updateRegionState(hri, State.OPEN);
532      // Now try splitting and it should work.
533      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
534      // Get daughters
535      checkAndGetDaughters(tableName);
536      // OK, so split happened after we cleared the blocking node.
537    } finally {
538      admin.balancerSwitch(true, false);
539      cluster.getMaster().setCatalogJanitorEnabled(true);
540      t.close();
541    }
542  }
543
544  /**
545   * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't
546   * find the immediate daughter of an offlined parent.
547   */
548  @Test
549  public void testShutdownFixupWhenDaughterHasSplit() throws Exception {
550    final TableName tableName = TableName.valueOf(name.getMethodName());
551
552    // Create table then get the single region for our new table.
553    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
554    List<HRegion> regions = cluster.getRegions(tableName);
555    RegionInfo hri = getAndCheckSingleTableRegion(regions);
556    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
557
558    // Turn off balancer so it doesn't cut in and mess up our placements.
559    this.admin.balancerSwitch(false, true);
560    // Turn off the meta scanner so it don't remove parent on us.
561    cluster.getMaster().setCatalogJanitorEnabled(false);
562    try {
563      // Add a bit of load up into the table so splittable.
564      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
565      // Get region pre-split.
566      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
567      printOutRegions(server, "Initial regions: ");
568      // Now split.
569      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
570      // Get daughters
571      List<HRegion> daughters = checkAndGetDaughters(tableName);
572      // Now split one of the daughters.
573      HRegion daughterRegion = daughters.get(0);
574      RegionInfo daughter = daughterRegion.getRegionInfo();
575      LOG.info("Daughter we are going to split: " + daughter);
576      clearReferences(daughterRegion);
577      LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences());
578      admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES);
579      // Get list of daughters
580      daughters = cluster.getRegions(tableName);
581      for (HRegion d : daughters) {
582        LOG.info("Regions before crash: " + d);
583      }
584      // Now crash the server
585      cluster.abortRegionServer(tableRegionIndex);
586      waitUntilRegionServerDead();
587      awaitDaughters(tableName, daughters.size());
588      // Assert daughters are online and ONLY the original daughters -- that
589      // fixup didn't insert one during server shutdown recover.
590      regions = cluster.getRegions(tableName);
591      for (HRegion d : daughters) {
592        LOG.info("Regions after crash: " + d);
593      }
594      if (daughters.size() != regions.size()) {
595        LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size());
596      }
597      assertEquals(daughters.size(), regions.size());
598      for (HRegion r : regions) {
599        LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r));
600        assertTrue("Missing region post crash " + r, daughters.contains(r));
601      }
602    } finally {
603      LOG.info("EXITING");
604      admin.balancerSwitch(true, false);
605      cluster.getMaster().setCatalogJanitorEnabled(true);
606      t.close();
607    }
608  }
609
610  private void clearReferences(HRegion region) throws IOException {
611    // Presumption.
612    assertEquals(1, region.getStores().size());
613    HStore store = region.getStores().get(0);
614    while (store.hasReferences()) {
615      while (store.storeEngine.getCompactor().isCompacting()) {
616        Threads.sleep(100);
617      }
618      // Run new compaction. Shoudn't be any others running.
619      region.compact(true);
620      store.closeAndArchiveCompactedFiles();
621    }
622  }
623
624  @Test
625  public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
626    TableName userTableName = TableName.valueOf(name.getMethodName());
627    TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName)
628      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build();
629    admin.createTable(htd);
630    Table table = TESTING_UTIL.getConnection().getTable(userTableName);
631    try {
632      for (int i = 0; i <= 5; i++) {
633        String row = "row" + i;
634        Put p = new Put(Bytes.toBytes(row));
635        String val = "Val" + i;
636        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val));
637        table.put(p);
638        admin.flush(userTableName);
639        Delete d = new Delete(Bytes.toBytes(row));
640        // Do a normal delete
641        table.delete(d);
642        admin.flush(userTableName);
643      }
644      admin.majorCompact(userTableName);
645      List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
646        .getRegionsOfTable(userTableName);
647      assertEquals(1, regionsOfTable.size());
648      RegionInfo hRegionInfo = regionsOfTable.get(0);
649      Put p = new Put(Bytes.toBytes("row6"));
650      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
651      table.put(p);
652      p = new Put(Bytes.toBytes("row7"));
653      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
654      table.put(p);
655      p = new Put(Bytes.toBytes("row8"));
656      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
657      table.put(p);
658      admin.flush(userTableName);
659      admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7"));
660      regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
661        .getRegionsOfTable(userTableName);
662
663      while (regionsOfTable.size() != 2) {
664        Thread.sleep(1000);
665        regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
666          .getRegionsOfTable(userTableName);
667        LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": "
668          + regionsOfTable);
669
670      }
671      Assert.assertEquals(2, regionsOfTable.size());
672
673      Scan s = new Scan();
674      ResultScanner scanner = table.getScanner(s);
675      int mainTableCount = 0;
676      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
677        mainTableCount++;
678      }
679      Assert.assertEquals(3, mainTableCount);
680    } finally {
681      table.close();
682    }
683  }
684
685  /**
686   * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could
687   * remove the parent region the master is killed and restarted.
688   */
689  @Test
690  public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
691    throws IOException, InterruptedException, NodeExistsException, KeeperException,
692    ServiceException, ExecutionException, TimeoutException {
693    final TableName tableName = TableName.valueOf(name.getMethodName());
694    // Create table then get the single region for our new table.
695    try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) {
696      List<HRegion> regions = cluster.getRegions(tableName);
697      RegionInfo hri = getAndCheckSingleTableRegion(regions);
698
699      int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
700
701      // Turn off balancer so it doesn't cut in and mess up our placements.
702      this.admin.balancerSwitch(false, true);
703      // Turn off the meta scanner so it don't remove parent on us.
704      cluster.getMaster().setCatalogJanitorEnabled(false);
705      // Add a bit of load up into the table so splittable.
706      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
707      // Get region pre-split.
708      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
709      printOutRegions(server, "Initial regions: ");
710      // Call split.
711      this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
712      List<HRegion> daughters = checkAndGetDaughters(tableName);
713
714      // Before cleanup, get a new master.
715      HMaster master = abortAndWaitForMaster();
716      // Now call compact on the daughters and clean up any references.
717      for (HRegion daughter : daughters) {
718        clearReferences(daughter);
719        assertFalse(daughter.hasReferences());
720      }
721      // BUT calling compact on the daughters is not enough. The CatalogJanitor looks
722      // in the filesystem, and the filesystem content is not same as what the Region
723      // is reading from. Compacted-away files are picked up later by the compacted
724      // file discharger process. It runs infrequently. Make it run so CatalogJanitor
725      // doens't find any references.
726      for (RegionServerThread rst : cluster.getRegionServerThreads()) {
727        boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
728        rst.getRegionServer().compactedFileDischarger.run();
729        rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
730      }
731      cluster.getMaster().setCatalogJanitorEnabled(true);
732      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
733      LOG.info("Starting run of CatalogJanitor");
734      cluster.getMaster().getCatalogJanitor().run();
735      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
736      RegionStates regionStates = master.getAssignmentManager().getRegionStates();
737      ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
738      assertEquals(null, regionServerOfRegion);
739    } finally {
740      TESTING_UTIL.getAdmin().balancerSwitch(true, false);
741      cluster.getMaster().setCatalogJanitorEnabled(true);
742    }
743  }
744
745  @Test
746  public void testSplitWithRegionReplicas() throws Exception {
747    final TableName tableName = TableName.valueOf(name.getMethodName());
748    TableDescriptor htd = TESTING_UTIL
749      .createModifyableTableDescriptor(TableName.valueOf(name.getMethodName()),
750        ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER,
751        ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED)
752      .setRegionReplication(2).setCoprocessor(SlowMeCopro.class.getName()).build();
753    // Create table then get the single region for our new table.
754    Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null);
755    List<HRegion> oldRegions;
756    do {
757      oldRegions = cluster.getRegions(tableName);
758      Thread.sleep(10);
759    } while (oldRegions.size() != 2);
760    for (HRegion h : oldRegions)
761      LOG.debug("OLDREGION " + h.getRegionInfo());
762    try {
763      int regionServerIndex =
764        cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName());
765      HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
766      insertData(tableName, admin, t);
767      // Turn off balancer so it doesn't cut in and mess up our placements.
768      admin.balancerSwitch(false, true);
769      // Turn off the meta scanner so it don't remove parent on us.
770      cluster.getMaster().setCatalogJanitorEnabled(false);
771      boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
772      assertEquals("The specified table should be present.", true, tableExists);
773      final HRegion region = findSplittableRegion(oldRegions);
774      regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
775      regionServer = cluster.getRegionServer(regionServerIndex);
776      assertTrue("not able to find a splittable region", region != null);
777      try {
778        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
779      } catch (IOException e) {
780        e.printStackTrace();
781        fail("Split execution should have succeeded with no exceptions thrown " + e);
782      }
783      // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
784      List<HRegion> newRegions;
785      do {
786        newRegions = cluster.getRegions(tableName);
787        for (HRegion h : newRegions)
788          LOG.debug("NEWREGION " + h.getRegionInfo());
789        Thread.sleep(1000);
790      } while (
791        (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
792          || newRegions.size() != 4
793      );
794      tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
795      assertEquals("The specified table should be present.", true, tableExists);
796      // exists works on stale and we see the put after the flush
797      byte[] b1 = Bytes.toBytes("row1");
798      Get g = new Get(b1);
799      g.setConsistency(Consistency.STRONG);
800      // The following GET will make a trip to the meta to get the new location of the 1st daughter
801      // In the process it will also get the location of the replica of the daughter (initially
802      // pointing to the parent's replica)
803      Result r = t.get(g);
804      Assert.assertFalse(r.isStale());
805      LOG.info("exists stale after flush done");
806
807      SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
808      g = new Get(b1);
809      g.setConsistency(Consistency.TIMELINE);
810      // This will succeed because in the previous GET we get the location of the replica
811      r = t.get(g);
812      Assert.assertTrue(r.isStale());
813      SlowMeCopro.getPrimaryCdl().get().countDown();
814    } finally {
815      SlowMeCopro.getPrimaryCdl().get().countDown();
816      admin.balancerSwitch(true, false);
817      cluster.getMaster().setCatalogJanitorEnabled(true);
818      t.close();
819    }
820  }
821
822  private void insertData(final TableName tableName, Admin admin, Table t) throws IOException {
823    insertData(tableName, admin, t, 1);
824  }
825
826  private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException {
827    Put p = new Put(Bytes.toBytes("row" + i));
828    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
829    t.put(p);
830    p = new Put(Bytes.toBytes("row" + (i + 1)));
831    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
832    t.put(p);
833    p = new Put(Bytes.toBytes("row" + (i + 2)));
834    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
835    t.put(p);
836    p = new Put(Bytes.toBytes("row" + (i + 3)));
837    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
838    t.put(p);
839    admin.flush(tableName);
840  }
841
842  /**
843   * If a table has regions that have no store files in a region, they should split successfully
844   * into two regions with no store files.
845   */
846  @Test
847  public void testSplitRegionWithNoStoreFiles() throws Exception {
848    final TableName tableName = TableName.valueOf(name.getMethodName());
849    // Create table then get the single region for our new table.
850    createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
851    List<HRegion> regions = cluster.getRegions(tableName);
852    RegionInfo hri = getAndCheckSingleTableRegion(regions);
853    ensureTableRegionNotOnSameServerAsMeta(admin, hri);
854    int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
855    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
856    // Turn off balancer so it doesn't cut in and mess up our placements.
857    this.admin.balancerSwitch(false, true);
858    // Turn off the meta scanner so it don't remove parent on us.
859    cluster.getMaster().setCatalogJanitorEnabled(false);
860    try {
861      // Precondition: we created a table with no data, no store files.
862      printOutRegions(regionServer, "Initial regions: ");
863      Configuration conf = cluster.getConfiguration();
864      HBaseFsck.debugLsr(conf, new Path("/"));
865      Path rootDir = CommonFSUtils.getRootDir(conf);
866      FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
867      Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
868      assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size());
869
870      // find a splittable region. Refresh the regions list
871      regions = cluster.getRegions(tableName);
872      final HRegion region = findSplittableRegion(regions);
873      assertTrue("not able to find a splittable region", region != null);
874
875      // Now split.
876      try {
877        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
878      } catch (IOException e) {
879        fail("Split execution should have succeeded with no exceptions thrown");
880      }
881
882      // Postcondition: split the table with no store files into two regions, but still have no
883      // store files
884      List<HRegion> daughters = cluster.getRegions(tableName);
885      assertEquals(2, daughters.size());
886
887      // check dirs
888      HBaseFsck.debugLsr(conf, new Path("/"));
889      Map<String, Path> storefilesAfter =
890        FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
891      assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0,
892        storefilesAfter.size());
893
894      hri = region.getRegionInfo(); // split parent
895      AssignmentManager am = cluster.getMaster().getAssignmentManager();
896      RegionStates regionStates = am.getRegionStates();
897      long start = EnvironmentEdgeManager.currentTime();
898      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
899        LOG.debug("Waiting for SPLIT state on: " + hri);
900        assertFalse("Timed out in waiting split parent to be in state SPLIT",
901          EnvironmentEdgeManager.currentTime() - start > 60000);
902        Thread.sleep(500);
903      }
904      assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN));
905      assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN));
906
907      // We should not be able to assign it again
908      try {
909        am.assign(hri);
910      } catch (DoNotRetryIOException e) {
911        // Expected
912      }
913      assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri));
914      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
915
916      // We should not be able to unassign it either
917      try {
918        am.unassign(hri);
919        fail("Should have thrown exception");
920      } catch (DoNotRetryIOException e) {
921        // Expected
922      }
923      assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri));
924      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
925    } finally {
926      admin.balancerSwitch(true, false);
927      cluster.getMaster().setCatalogJanitorEnabled(true);
928    }
929  }
930
931  @Test
932  public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception {
933    final TableName tableName = TableName.valueOf(name.getMethodName());
934    try {
935      byte[] cf = Bytes.toBytes("f");
936      byte[] cf1 = Bytes.toBytes("i_f");
937      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
938        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
939        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1))
940        .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build();
941      admin.createTable(htd);
942      List<HRegion> regions = awaitTableRegions(tableName);
943      HRegion region = regions.get(0);
944      for (int i = 3; i < 9; i++) {
945        Put p = new Put(Bytes.toBytes("row" + i));
946        p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
947        p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
948        region.put(p);
949      }
950      region.flush(true);
951      HStore store = region.getStore(cf);
952      Collection<HStoreFile> storefiles = store.getStorefiles();
953      assertEquals(1, storefiles.size());
954      assertFalse(region.hasReferences());
955      Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
956        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
957      assertNull(referencePath);
958      referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
959        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
960      assertNotNull(referencePath);
961    } finally {
962      TESTING_UTIL.deleteTable(tableName);
963    }
964  }
965
966  private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
967    for (int i = 0; i < 5; ++i) {
968      for (HRegion r : regions) {
969        if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
970          return (r);
971        }
972      }
973      Thread.sleep(100);
974    }
975    return null;
976  }
977
978  private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException {
979    List<HRegion> daughters = null;
980    // try up to 10s
981    for (int i = 0; i < 100; i++) {
982      daughters = cluster.getRegions(tableName);
983      if (daughters.size() >= 2) {
984        break;
985      }
986      Thread.sleep(100);
987    }
988    assertTrue(daughters.size() >= 2);
989    return daughters;
990  }
991
992  private HMaster abortAndWaitForMaster() throws IOException, InterruptedException {
993    cluster.abortMaster(0);
994    cluster.waitOnMaster(0);
995    HMaster master = cluster.startMaster().getMaster();
996    cluster.waitForActiveAndReadyMaster();
997    // reset the connections
998    Closeables.close(admin, true);
999    TESTING_UTIL.invalidateConnection();
1000    admin = TESTING_UTIL.getAdmin();
1001    return master;
1002  }
1003
1004  /**
1005   * Ensure single table region is not on same server as the single hbase:meta table region.
1006   * @return Index of the server hosting the single table region
1007   */
1008  private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri)
1009    throws IOException, MasterNotRunningException, ZooKeeperConnectionException,
1010    InterruptedException {
1011    // Now make sure that the table region is not on same server as that hosting
1012    // hbase:meta We don't want hbase:meta replay polluting our test when we later crash
1013    // the table region serving server.
1014    int metaServerIndex = cluster.getServerWithMeta();
1015    HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1016    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1017    assertTrue(tableRegionIndex != -1);
1018    HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1019    LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other="
1020      + tableRegionServer.getServerName());
1021    if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1022      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1023      assertNotNull(hrs);
1024      assertNotNull(hri);
1025      LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName()
1026        + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1027      admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName());
1028    }
1029    // Wait till table region is up on the server that is NOT carrying hbase:meta.
1030    for (int i = 0; i < 100; i++) {
1031      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1032      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1033      LOG.debug("Waiting on region move off the hbase:meta server; current index "
1034        + tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1035      Thread.sleep(100);
1036    }
1037    assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex,
1038      tableRegionIndex != -1 && tableRegionIndex != metaServerIndex);
1039    // Verify for sure table region is not on same server as hbase:meta
1040    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1041    assertTrue(tableRegionIndex != -1);
1042    assertNotSame(metaServerIndex, tableRegionIndex);
1043    return tableRegionIndex;
1044  }
1045
1046  /**
1047   * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers
1048   * since crashed servers occupy an index.
1049   * @return A regionserver that is not <code>notThisOne</code> or null if none found
1050   */
1051  private HRegionServer getOtherRegionServer(final SingleProcessHBaseCluster cluster,
1052    final HRegionServer notThisOne) {
1053    for (RegionServerThread rst : cluster.getRegionServerThreads()) {
1054      HRegionServer hrs = rst.getRegionServer();
1055      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1056      if (hrs.isStopping() || hrs.isStopped()) continue;
1057      return hrs;
1058    }
1059    return null;
1060  }
1061
1062  private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException {
1063    List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1064    for (RegionInfo region : regions) {
1065      LOG.info(prefix + region.getRegionNameAsString());
1066    }
1067  }
1068
1069  private void waitUntilRegionServerDead() throws InterruptedException, IOException {
1070    // Wait until the master processes the RS shutdown
1071    for (int i =
1072      0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1073        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) {
1074      LOG.info("Waiting on server to go down");
1075      Thread.sleep(100);
1076    }
1077    assertFalse("Waited too long for RS to die",
1078      cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1079        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS);
1080  }
1081
1082  private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1083    // Wait till regions are back on line again.
1084    for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) {
1085      LOG.info("Waiting for repair to happen");
1086      Thread.sleep(1000);
1087    }
1088    if (cluster.getRegions(tableName).size() < numDaughters) {
1089      fail("Waiting too long for daughter regions");
1090    }
1091  }
1092
1093  private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1094    List<HRegion> regions = null;
1095    for (int i = 0; i < 100; i++) {
1096      regions = cluster.getRegions(tableName);
1097      if (regions.size() > 0) break;
1098      Thread.sleep(100);
1099    }
1100    return regions;
1101  }
1102
1103  private Table createTableAndWait(TableName tableName, byte[] cf)
1104    throws IOException, InterruptedException {
1105    Table t = TESTING_UTIL.createTable(tableName, cf);
1106    awaitTableRegions(tableName);
1107    assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0);
1108    return t;
1109  }
1110
1111  // Make it public so that JVMClusterUtil can access it.
1112  public static class MyMaster extends HMaster {
1113    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
1114      super(conf);
1115    }
1116
1117    @Override
1118    protected MasterRpcServices createRpcServices() throws IOException {
1119      return new MyMasterRpcServices(this);
1120    }
1121  }
1122
1123  static class MyMasterRpcServices extends MasterRpcServices {
1124    static AtomicBoolean enabled = new AtomicBoolean(false);
1125
1126    private HMaster myMaster;
1127
1128    public MyMasterRpcServices(HMaster master) throws IOException {
1129      super(master);
1130      myMaster = master;
1131    }
1132
1133    @Override
1134    public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
1135      ReportRegionStateTransitionRequest req) throws ServiceException {
1136      ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
1137      if (
1138        enabled.get()
1139          && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT)
1140          && !resp.hasErrorMessage()
1141      ) {
1142        RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
1143        for (RegionStateNode regionState : regionStates.getRegionsInTransition()) {
1144          /*
1145           * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew())
1146           * { regionStates.deleteRegion(regionState.getRegion()); }
1147           */
1148        }
1149      }
1150      return resp;
1151    }
1152  }
1153
1154  static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
1155
1156    @Override
1157    protected boolean shouldSplit() {
1158      return true;
1159    }
1160
1161    @Override
1162    public boolean skipStoreFileRangeCheck(String familyName) {
1163      if (familyName.startsWith("i_")) {
1164        return true;
1165      } else {
1166        return false;
1167      }
1168    }
1169  }
1170}