001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNotSame;
026import static org.junit.Assert.assertNull;
027import static org.junit.Assert.assertTrue;
028import static org.junit.Assert.fail;
029
030import java.io.IOException;
031import java.lang.reflect.Field;
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.List;
035import java.util.Map;
036import java.util.Optional;
037import java.util.concurrent.CountDownLatch;
038import java.util.concurrent.ExecutionException;
039import java.util.concurrent.TimeUnit;
040import java.util.concurrent.TimeoutException;
041import java.util.concurrent.atomic.AtomicBoolean;
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.Path;
045import org.apache.hadoop.hbase.CellComparator;
046import org.apache.hadoop.hbase.Coprocessor;
047import org.apache.hadoop.hbase.CoprocessorEnvironment;
048import org.apache.hadoop.hbase.DoNotRetryIOException;
049import org.apache.hadoop.hbase.HBaseClassTestRule;
050import org.apache.hadoop.hbase.HBaseTestingUtility;
051import org.apache.hadoop.hbase.HConstants;
052import org.apache.hadoop.hbase.HTableDescriptor;
053import org.apache.hadoop.hbase.MasterNotRunningException;
054import org.apache.hadoop.hbase.MiniHBaseCluster;
055import org.apache.hadoop.hbase.PrivateCellUtil;
056import org.apache.hadoop.hbase.ServerName;
057import org.apache.hadoop.hbase.StartMiniClusterOption;
058import org.apache.hadoop.hbase.TableName;
059import org.apache.hadoop.hbase.ZooKeeperConnectionException;
060import org.apache.hadoop.hbase.client.Admin;
061import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
062import org.apache.hadoop.hbase.client.Consistency;
063import org.apache.hadoop.hbase.client.Delete;
064import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
065import org.apache.hadoop.hbase.client.Get;
066import org.apache.hadoop.hbase.client.Mutation;
067import org.apache.hadoop.hbase.client.Put;
068import org.apache.hadoop.hbase.client.RegionInfo;
069import org.apache.hadoop.hbase.client.Result;
070import org.apache.hadoop.hbase.client.ResultScanner;
071import org.apache.hadoop.hbase.client.Scan;
072import org.apache.hadoop.hbase.client.Table;
073import org.apache.hadoop.hbase.client.TableDescriptor;
074import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
075import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
077import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
078import org.apache.hadoop.hbase.coprocessor.MasterObserver;
079import org.apache.hadoop.hbase.coprocessor.ObserverContext;
080import org.apache.hadoop.hbase.io.HFileLink;
081import org.apache.hadoop.hbase.io.Reference;
082import org.apache.hadoop.hbase.master.HMaster;
083import org.apache.hadoop.hbase.master.LoadBalancer;
084import org.apache.hadoop.hbase.master.MasterRpcServices;
085import org.apache.hadoop.hbase.master.RegionState;
086import org.apache.hadoop.hbase.master.RegionState.State;
087import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
088import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
089import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
090import org.apache.hadoop.hbase.master.assignment.RegionStates;
091import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
092import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
093import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
094import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
095import org.apache.hadoop.hbase.testclassification.LargeTests;
096import org.apache.hadoop.hbase.testclassification.RegionServerTests;
097import org.apache.hadoop.hbase.util.Bytes;
098import org.apache.hadoop.hbase.util.CommonFSUtils;
099import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
100import org.apache.hadoop.hbase.util.FSUtils;
101import org.apache.hadoop.hbase.util.HBaseFsck;
102import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
103import org.apache.hadoop.hbase.util.Threads;
104import org.apache.zookeeper.KeeperException;
105import org.apache.zookeeper.KeeperException.NodeExistsException;
106import org.junit.After;
107import org.junit.AfterClass;
108import org.junit.Assert;
109import org.junit.Before;
110import org.junit.BeforeClass;
111import org.junit.ClassRule;
112import org.junit.Rule;
113import org.junit.Test;
114import org.junit.experimental.categories.Category;
115import org.junit.rules.TestName;
116import org.mockito.Mockito;
117import org.slf4j.Logger;
118import org.slf4j.LoggerFactory;
119
120import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
121import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
122import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
123
124import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
125import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
127import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
128
129/**
130 * The below tests are testing split region against a running cluster
131 */
132@Category({ RegionServerTests.class, LargeTests.class })
133public class TestSplitTransactionOnCluster {
134
135  @ClassRule
136  public static final HBaseClassTestRule CLASS_RULE =
137    HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class);
138
139  private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class);
140  private Admin admin = null;
141  private MiniHBaseCluster cluster = null;
142  private static final int NB_SERVERS = 3;
143
144  static final HBaseTestingUtility TESTING_UTIL = new HBaseTestingUtility();
145
146  @Rule
147  public TestName name = new TestName();
148
149  @BeforeClass
150  public static void before() throws Exception {
151    TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
152    StartMiniClusterOption option = StartMiniClusterOption.builder().masterClass(MyMaster.class)
153      .numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build();
154    TESTING_UTIL.startMiniCluster(option);
155  }
156
157  @AfterClass
158  public static void after() throws Exception {
159    TESTING_UTIL.shutdownMiniCluster();
160  }
161
162  @Before
163  public void setup() throws IOException {
164    TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
165    this.admin = TESTING_UTIL.getAdmin();
166    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
167  }
168
169  @After
170  public void tearDown() throws Exception {
171    this.admin.close();
172    for (TableDescriptor htd : this.admin.listTableDescriptors()) {
173      LOG.info("Tear down, remove table=" + htd.getTableName());
174      TESTING_UTIL.deleteTable(htd.getTableName());
175    }
176  }
177
178  private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
179    throws IOException, InterruptedException {
180    assertEquals(1, regions.size());
181    RegionInfo hri = regions.get(0).getRegionInfo();
182    AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
183    return hri;
184  }
185
186  private void requestSplitRegion(final HRegionServer rsServer, final Region region,
187    final byte[] midKey) throws IOException {
188    long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0);
189    // wait for the split to complete or get interrupted. If the split completes successfully,
190    // the procedure will return true; if the split fails, the procedure would throw exception.
191    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
192  }
193
194  @Test
195  public void testRITStateForRollback() throws Exception {
196    final TableName tableName = TableName.valueOf(name.getMethodName());
197    final HMaster master = cluster.getMaster();
198    try {
199      // Create table then get the single region for our new table.
200      Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
201      final List<HRegion> regions = cluster.getRegions(tableName);
202      final RegionInfo hri = getAndCheckSingleTableRegion(regions);
203      insertData(tableName, admin, t);
204      t.close();
205
206      // Turn off balancer so it doesn't cut in and mess up our placements.
207      this.admin.balancerSwitch(false, true);
208      // Turn off the meta scanner so it don't remove parent on us.
209      master.setCatalogJanitorEnabled(false);
210
211      // find a splittable region
212      final HRegion region = findSplittableRegion(regions);
213      assertTrue("not able to find a splittable region", region != null);
214
215      // install master co-processor to fail splits
216      master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class,
217        Coprocessor.PRIORITY_USER, master.getConfiguration());
218
219      // split async
220      this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 });
221
222      // we have to wait until the SPLITTING state is seen by the master
223      FailingSplitMasterObserver observer =
224        master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class);
225      assertNotNull(observer);
226      observer.latch.await();
227
228      LOG.info("Waiting for region to come out of RIT");
229      while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) {
230        Threads.sleep(100);
231      }
232      assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri));
233    } finally {
234      admin.balancerSwitch(true, false);
235      master.setCatalogJanitorEnabled(true);
236      abortAndWaitForMaster();
237      TESTING_UTIL.deleteTable(tableName);
238    }
239  }
240
241  @Test
242  public void testSplitFailedCompactionAndSplit() throws Exception {
243    final TableName tableName = TableName.valueOf(name.getMethodName());
244    // Create table then get the single region for our new table.
245    byte[] cf = Bytes.toBytes("cf");
246    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
247      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
248    admin.createTable(htd);
249
250    for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) {
251      Thread.sleep(100);
252    }
253    assertEquals(1, cluster.getRegions(tableName).size());
254
255    HRegion region = cluster.getRegions(tableName).get(0);
256    HStore store = region.getStore(cf);
257    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
258    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
259
260    Table t = TESTING_UTIL.getConnection().getTable(tableName);
261    // insert data
262    insertData(tableName, admin, t);
263    insertData(tableName, admin, t);
264
265    int fileNum = store.getStorefiles().size();
266    // 0, Compaction Request
267    store.triggerMajorCompaction();
268    Optional<CompactionContext> cc = store.requestCompaction();
269    assertTrue(cc.isPresent());
270    // 1, A timeout split
271    // 1.1 close region
272    assertEquals(2, region.close(false).get(cf).size());
273    // 1.2 rollback and Region initialize again
274    region.initialize();
275
276    // 2, Run Compaction cc
277    assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE));
278    assertTrue(fileNum > store.getStorefiles().size());
279
280    // 3, Split
281    requestSplitRegion(regionServer, region, Bytes.toBytes("row3"));
282    assertEquals(2, cluster.getRegions(tableName).size());
283  }
284
285  @Test
286  public void testSplitCompactWithPriority() throws Exception {
287    final TableName tableName = TableName.valueOf(name.getMethodName());
288    // Create table then get the single region for our new table.
289    byte[] cf = Bytes.toBytes("cf");
290    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
291      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
292    admin.createTable(htd);
293
294    assertNotEquals("Unable to retrieve regions of the table", -1,
295      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
296
297    HRegion region = cluster.getRegions(tableName).get(0);
298    HStore store = region.getStore(cf);
299    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
300    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
301
302    Table table = TESTING_UTIL.getConnection().getTable(tableName);
303    // insert data
304    insertData(tableName, admin, table);
305    insertData(tableName, admin, table, 20);
306    insertData(tableName, admin, table, 40);
307
308    // Compaction Request
309    store.triggerMajorCompaction();
310    Optional<CompactionContext> compactionContext = store.requestCompaction();
311    assertTrue(compactionContext.isPresent());
312    assertFalse(compactionContext.get().getRequest().isAfterSplit());
313    assertEquals(compactionContext.get().getRequest().getPriority(), 13);
314
315    // Split
316    long procId =
317      cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0);
318
319    // wait for the split to complete or get interrupted. If the split completes successfully,
320    // the procedure will return true; if the split fails, the procedure would throw exception.
321    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
322    Thread.sleep(3000);
323    assertNotEquals("Table is not split properly?", -1,
324      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
325    // we have 2 daughter regions
326    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
327    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
328    HStore hStore1 = hRegion1.getStore(cf);
329    HStore hStore2 = hRegion2.getStore(cf);
330
331    // For hStore1 && hStore2, set mock reference to one of the storeFiles
332    StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo();
333    StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo();
334    Field field = StoreFileInfo.class.getDeclaredField("reference");
335    field.setAccessible(true);
336    field.set(storeFileInfo1, Mockito.mock(Reference.class));
337    field.set(storeFileInfo2, Mockito.mock(Reference.class));
338    hStore1.triggerMajorCompaction();
339    hStore2.triggerMajorCompaction();
340
341    compactionContext = hStore1.requestCompaction();
342    assertTrue(compactionContext.isPresent());
343    // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true &&
344    // highest priority for hStore1's compactionContext
345    assertTrue(compactionContext.get().getRequest().isAfterSplit());
346    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000);
347
348    compactionContext =
349      hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null);
350    assertTrue(compactionContext.isPresent());
351    // compaction request contains higher priority than default priority of daughter region
352    // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to
353    // be accepted.
354    assertTrue(compactionContext.get().getRequest().isAfterSplit());
355    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10);
356    admin.disableTable(tableName);
357    admin.deleteTable(tableName);
358  }
359
360  @Test
361  public void testContinuousSplitUsingLinkFile() throws Exception {
362    final TableName tableName = TableName.valueOf(name.getMethodName());
363    // Create table then get the single region for our new table.
364    byte[] cf = Bytes.toBytes("cf");
365    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName)
366      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));
367    String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName();
368    builder.setValue(SPLIT_POLICY, splitPolicy);
369
370    admin.createTable(builder.build());
371    admin.compactionSwitch(false, new ArrayList<>());
372
373    assertNotEquals("Unable to retrieve regions of the table", -1,
374      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
375    Table table = TESTING_UTIL.getConnection().getTable(tableName);
376    // insert data
377    insertData(tableName, admin, table, 10);
378    insertData(tableName, admin, table, 20);
379    insertData(tableName, admin, table, 40);
380    int rowCount = 3 * 4;
381    Scan scan = new Scan();
382    scanValidate(scan, rowCount, table);
383
384    // Split
385    admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(),
386      Bytes.toBytes("row14"));
387    // wait for the split to complete or get interrupted. If the split completes successfully,
388    // the procedure will return true; if the split fails, the procedure would throw exception.
389    Thread.sleep(3000);
390    assertNotEquals("Table is not split properly?", -1,
391      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
392    // we have 2 daughter regions
393    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
394    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
395    HStore hStore1 = hRegion1.getStore(cf);
396    HStore hStore2 = hRegion2.getStore(cf);
397    // the sum of store files of the two children should be equal to their parent
398    assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount());
399    // both the two children should have link files
400    for (StoreFile sf : hStore1.getStorefiles()) {
401      assertTrue(HFileLink.isHFileLink(sf.getPath()));
402    }
403    for (StoreFile sf : hStore2.getStorefiles()) {
404      assertTrue(HFileLink.isHFileLink(sf.getPath()));
405    }
406    // validate children data
407    scan = new Scan();
408    scanValidate(scan, rowCount, table);
409
410    // Continuous Split
411    findRegionToSplit(tableName, "row24");
412    Thread.sleep(3000);
413    assertNotEquals("Table is not split properly?", -1,
414      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3));
415    // now table has 3 region, each region should have one link file
416    for (HRegion newRegion : cluster.getRegions(tableName)) {
417      assertEquals(1, newRegion.getStore(cf).getStorefilesCount());
418      assertTrue(
419        HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath()));
420    }
421
422    scan = new Scan();
423    scanValidate(scan, rowCount, table);
424
425    // Continuous Split, random split HFileLink, generate Reference files.
426    // After this, can not continuous split, because there are reference files.
427    findRegionToSplit(tableName, "row11");
428    Thread.sleep(3000);
429    assertNotEquals("Table is not split properly?", -1,
430      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4));
431
432    scan = new Scan();
433    scanValidate(scan, rowCount, table);
434  }
435
436  private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception {
437    HRegion toSplit = null;
438    byte[] toSplitKey = Bytes.toBytes(splitRowKey);
439    for (HRegion rg : cluster.getRegions(tableName)) {
440      LOG.debug(
441        "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()="
442          + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey);
443      if (
444        (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare(
445          PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()),
446          PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0)
447          && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare(
448            PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()),
449            PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0)
450      ) {
451        toSplit = rg;
452      }
453    }
454    assertNotNull(toSplit);
455    admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey);
456  }
457
458  private static void scanValidate(Scan scan, int expectedRowCount, Table table)
459    throws IOException {
460    ResultScanner scanner = table.getScanner(scan);
461    int rows = 0;
462    for (Result result : scanner) {
463      rows++;
464    }
465    scanner.close();
466    assertEquals(expectedRowCount, rows);
467  }
468
469  public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver {
470    volatile CountDownLatch latch;
471
472    @Override
473    public void start(CoprocessorEnvironment e) throws IOException {
474      latch = new CountDownLatch(1);
475    }
476
477    @Override
478    public Optional<MasterObserver> getMasterObserver() {
479      return Optional.of(this);
480    }
481
482    @Override
483    public void preSplitRegionBeforeMETAAction(
484      final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey,
485      final List<Mutation> metaEntries) throws IOException {
486      latch.countDown();
487      throw new IOException("Causing rollback of region split");
488    }
489  }
490
491  @Test
492  public void testSplitRollbackOnRegionClosing() throws Exception {
493    final TableName tableName = TableName.valueOf(name.getMethodName());
494
495    // Create table then get the single region for our new table.
496    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
497    List<HRegion> regions = cluster.getRegions(tableName);
498    RegionInfo hri = getAndCheckSingleTableRegion(regions);
499
500    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
501
502    RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
503
504    // Turn off balancer so it doesn't cut in and mess up our placements.
505    this.admin.balancerSwitch(false, true);
506    // Turn off the meta scanner so it don't remove parent on us.
507    cluster.getMaster().setCatalogJanitorEnabled(false);
508    try {
509      // Add a bit of load up into the table so splittable.
510      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
511      // Get region pre-split.
512      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
513      printOutRegions(server, "Initial regions: ");
514      int regionCount = cluster.getRegions(hri.getTable()).size();
515      regionStates.updateRegionState(hri, RegionState.State.CLOSING);
516
517      // Now try splitting.... should fail. And each should successfully
518      // rollback.
519      // We don't roll back here anymore. Instead we fail-fast on construction of the
520      // split transaction. Catch the exception instead.
521      try {
522        this.admin.splitRegionAsync(hri.getRegionName());
523        fail();
524      } catch (DoNotRetryRegionException e) {
525        // Expected
526      }
527      // Wait around a while and assert count of regions remains constant.
528      for (int i = 0; i < 10; i++) {
529        Thread.sleep(100);
530        assertEquals(regionCount, cluster.getRegions(hri.getTable()).size());
531      }
532      regionStates.updateRegionState(hri, State.OPEN);
533      // Now try splitting and it should work.
534      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
535      // Get daughters
536      checkAndGetDaughters(tableName);
537      // OK, so split happened after we cleared the blocking node.
538    } finally {
539      admin.balancerSwitch(true, false);
540      cluster.getMaster().setCatalogJanitorEnabled(true);
541      t.close();
542    }
543  }
544
545  /**
546   * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't
547   * find the immediate daughter of an offlined parent.
548   */
549  @Test
550  public void testShutdownFixupWhenDaughterHasSplit() throws Exception {
551    final TableName tableName = TableName.valueOf(name.getMethodName());
552
553    // Create table then get the single region for our new table.
554    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
555    List<HRegion> regions = cluster.getRegions(tableName);
556    RegionInfo hri = getAndCheckSingleTableRegion(regions);
557    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
558
559    // Turn off balancer so it doesn't cut in and mess up our placements.
560    this.admin.balancerSwitch(false, true);
561    // Turn off the meta scanner so it don't remove parent on us.
562    cluster.getMaster().setCatalogJanitorEnabled(false);
563    try {
564      // Add a bit of load up into the table so splittable.
565      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
566      // Get region pre-split.
567      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
568      printOutRegions(server, "Initial regions: ");
569      // Now split.
570      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
571      // Get daughters
572      List<HRegion> daughters = checkAndGetDaughters(tableName);
573      // Now split one of the daughters.
574      HRegion daughterRegion = daughters.get(0);
575      RegionInfo daughter = daughterRegion.getRegionInfo();
576      LOG.info("Daughter we are going to split: " + daughter);
577      clearReferences(daughterRegion);
578      LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences());
579      admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES);
580      // Get list of daughters
581      daughters = cluster.getRegions(tableName);
582      for (HRegion d : daughters) {
583        LOG.info("Regions before crash: " + d);
584      }
585      // Now crash the server
586      cluster.abortRegionServer(tableRegionIndex);
587      waitUntilRegionServerDead();
588      awaitDaughters(tableName, daughters.size());
589      // Assert daughters are online and ONLY the original daughters -- that
590      // fixup didn't insert one during server shutdown recover.
591      regions = cluster.getRegions(tableName);
592      for (HRegion d : daughters) {
593        LOG.info("Regions after crash: " + d);
594      }
595      if (daughters.size() != regions.size()) {
596        LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size());
597      }
598      assertEquals(daughters.size(), regions.size());
599      for (HRegion r : regions) {
600        LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r));
601        assertTrue("Missing region post crash " + r, daughters.contains(r));
602      }
603    } finally {
604      LOG.info("EXITING");
605      admin.balancerSwitch(true, false);
606      cluster.getMaster().setCatalogJanitorEnabled(true);
607      t.close();
608    }
609  }
610
611  private void clearReferences(HRegion region) throws IOException {
612    // Presumption.
613    assertEquals(1, region.getStores().size());
614    HStore store = region.getStores().get(0);
615    while (store.hasReferences()) {
616      while (store.storeEngine.getCompactor().isCompacting()) {
617        Threads.sleep(100);
618      }
619      // Run new compaction. Shoudn't be any others running.
620      region.compact(true);
621      store.closeAndArchiveCompactedFiles();
622    }
623  }
624
625  @Test
626  public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
627    TableName userTableName = TableName.valueOf(name.getMethodName());
628    TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName)
629      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build();
630    admin.createTable(htd);
631    Table table = TESTING_UTIL.getConnection().getTable(userTableName);
632    try {
633      for (int i = 0; i <= 5; i++) {
634        String row = "row" + i;
635        Put p = new Put(row.getBytes());
636        String val = "Val" + i;
637        p.addColumn("col".getBytes(), "ql".getBytes(), val.getBytes());
638        table.put(p);
639        admin.flush(userTableName);
640        Delete d = new Delete(row.getBytes());
641        // Do a normal delete
642        table.delete(d);
643        admin.flush(userTableName);
644      }
645      admin.majorCompact(userTableName);
646      List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
647        .getRegionsOfTable(userTableName);
648      assertEquals(1, regionsOfTable.size());
649      RegionInfo hRegionInfo = regionsOfTable.get(0);
650      Put p = new Put("row6".getBytes());
651      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
652      table.put(p);
653      p = new Put("row7".getBytes());
654      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
655      table.put(p);
656      p = new Put("row8".getBytes());
657      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
658      table.put(p);
659      admin.flush(userTableName);
660      admin.splitRegionAsync(hRegionInfo.getRegionName(), "row7".getBytes());
661      regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
662        .getRegionsOfTable(userTableName);
663
664      while (regionsOfTable.size() != 2) {
665        Thread.sleep(1000);
666        regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
667          .getRegionsOfTable(userTableName);
668        LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": "
669          + regionsOfTable);
670
671      }
672      Assert.assertEquals(2, regionsOfTable.size());
673
674      Scan s = new Scan();
675      ResultScanner scanner = table.getScanner(s);
676      int mainTableCount = 0;
677      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
678        mainTableCount++;
679      }
680      Assert.assertEquals(3, mainTableCount);
681    } finally {
682      table.close();
683    }
684  }
685
686  /**
687   * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could
688   * remove the parent region the master is killed and restarted.
689   */
690  @Test
691  public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
692    throws IOException, InterruptedException, NodeExistsException, KeeperException,
693    ServiceException, ExecutionException, TimeoutException {
694    final TableName tableName = TableName.valueOf(name.getMethodName());
695    // Create table then get the single region for our new table.
696    try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) {
697      List<HRegion> regions = cluster.getRegions(tableName);
698      RegionInfo hri = getAndCheckSingleTableRegion(regions);
699
700      int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
701
702      // Turn off balancer so it doesn't cut in and mess up our placements.
703      this.admin.balancerSwitch(false, true);
704      // Turn off the meta scanner so it don't remove parent on us.
705      cluster.getMaster().setCatalogJanitorEnabled(false);
706      // Add a bit of load up into the table so splittable.
707      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
708      // Get region pre-split.
709      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
710      printOutRegions(server, "Initial regions: ");
711      // Call split.
712      this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
713      List<HRegion> daughters = checkAndGetDaughters(tableName);
714
715      // Before cleanup, get a new master.
716      HMaster master = abortAndWaitForMaster();
717      // Now call compact on the daughters and clean up any references.
718      for (HRegion daughter : daughters) {
719        clearReferences(daughter);
720        assertFalse(daughter.hasReferences());
721      }
722      // BUT calling compact on the daughters is not enough. The CatalogJanitor looks
723      // in the filesystem, and the filesystem content is not same as what the Region
724      // is reading from. Compacted-away files are picked up later by the compacted
725      // file discharger process. It runs infrequently. Make it run so CatalogJanitor
726      // doens't find any references.
727      for (RegionServerThread rst : cluster.getRegionServerThreads()) {
728        boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
729        rst.getRegionServer().compactedFileDischarger.run();
730        rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
731      }
732      cluster.getMaster().setCatalogJanitorEnabled(true);
733      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
734      LOG.info("Starting run of CatalogJanitor");
735      cluster.getMaster().getCatalogJanitor().run();
736      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
737      RegionStates regionStates = master.getAssignmentManager().getRegionStates();
738      ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
739      assertEquals(null, regionServerOfRegion);
740    } finally {
741      TESTING_UTIL.getAdmin().balancerSwitch(true, false);
742      cluster.getMaster().setCatalogJanitorEnabled(true);
743    }
744  }
745
746  @Test
747  public void testSplitWithRegionReplicas() throws Exception {
748    final TableName tableName = TableName.valueOf(name.getMethodName());
749    HTableDescriptor htd = TESTING_UTIL.createTableDescriptor(name.getMethodName());
750    htd.setRegionReplication(2);
751    htd.addCoprocessor(SlowMeCopro.class.getName());
752    // Create table then get the single region for our new table.
753    Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null);
754    List<HRegion> oldRegions;
755    do {
756      oldRegions = cluster.getRegions(tableName);
757      Thread.sleep(10);
758    } while (oldRegions.size() != 2);
759    for (HRegion h : oldRegions)
760      LOG.debug("OLDREGION " + h.getRegionInfo());
761    try {
762      int regionServerIndex =
763        cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName());
764      HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
765      insertData(tableName, admin, t);
766      // Turn off balancer so it doesn't cut in and mess up our placements.
767      admin.balancerSwitch(false, true);
768      // Turn off the meta scanner so it don't remove parent on us.
769      cluster.getMaster().setCatalogJanitorEnabled(false);
770      boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
771      assertEquals("The specified table should be present.", true, tableExists);
772      final HRegion region = findSplittableRegion(oldRegions);
773      regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
774      regionServer = cluster.getRegionServer(regionServerIndex);
775      assertTrue("not able to find a splittable region", region != null);
776      try {
777        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
778      } catch (IOException e) {
779        e.printStackTrace();
780        fail("Split execution should have succeeded with no exceptions thrown " + e);
781      }
782      // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
783      List<HRegion> newRegions;
784      do {
785        newRegions = cluster.getRegions(tableName);
786        for (HRegion h : newRegions)
787          LOG.debug("NEWREGION " + h.getRegionInfo());
788        Thread.sleep(1000);
789      } while (
790        (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
791          || newRegions.size() != 4
792      );
793      tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
794      assertEquals("The specified table should be present.", true, tableExists);
795      // exists works on stale and we see the put after the flush
796      byte[] b1 = "row1".getBytes();
797      Get g = new Get(b1);
798      g.setConsistency(Consistency.STRONG);
799      // The following GET will make a trip to the meta to get the new location of the 1st daughter
800      // In the process it will also get the location of the replica of the daughter (initially
801      // pointing to the parent's replica)
802      Result r = t.get(g);
803      Assert.assertFalse(r.isStale());
804      LOG.info("exists stale after flush done");
805
806      SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
807      g = new Get(b1);
808      g.setConsistency(Consistency.TIMELINE);
809      // This will succeed because in the previous GET we get the location of the replica
810      r = t.get(g);
811      Assert.assertTrue(r.isStale());
812      SlowMeCopro.getPrimaryCdl().get().countDown();
813    } finally {
814      SlowMeCopro.getPrimaryCdl().get().countDown();
815      admin.balancerSwitch(true, false);
816      cluster.getMaster().setCatalogJanitorEnabled(true);
817      t.close();
818    }
819  }
820
821  private void insertData(final TableName tableName, Admin admin, Table t) throws IOException {
822    insertData(tableName, admin, t, 1);
823  }
824
825  private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException {
826    Put p = new Put(Bytes.toBytes("row" + i));
827    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
828    t.put(p);
829    p = new Put(Bytes.toBytes("row" + (i + 1)));
830    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
831    t.put(p);
832    p = new Put(Bytes.toBytes("row" + (i + 2)));
833    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
834    t.put(p);
835    p = new Put(Bytes.toBytes("row" + (i + 3)));
836    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
837    t.put(p);
838    admin.flush(tableName);
839  }
840
841  /**
842   * If a table has regions that have no store files in a region, they should split successfully
843   * into two regions with no store files.
844   */
845  @Test
846  public void testSplitRegionWithNoStoreFiles() throws Exception {
847    final TableName tableName = TableName.valueOf(name.getMethodName());
848    // Create table then get the single region for our new table.
849    createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
850    List<HRegion> regions = cluster.getRegions(tableName);
851    RegionInfo hri = getAndCheckSingleTableRegion(regions);
852    ensureTableRegionNotOnSameServerAsMeta(admin, hri);
853    int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
854    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
855    // Turn off balancer so it doesn't cut in and mess up our placements.
856    this.admin.balancerSwitch(false, true);
857    // Turn off the meta scanner so it don't remove parent on us.
858    cluster.getMaster().setCatalogJanitorEnabled(false);
859    try {
860      // Precondition: we created a table with no data, no store files.
861      printOutRegions(regionServer, "Initial regions: ");
862      Configuration conf = cluster.getConfiguration();
863      HBaseFsck.debugLsr(conf, new Path("/"));
864      Path rootDir = CommonFSUtils.getRootDir(conf);
865      FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
866      Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
867      assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size());
868
869      // find a splittable region. Refresh the regions list
870      regions = cluster.getRegions(tableName);
871      final HRegion region = findSplittableRegion(regions);
872      assertTrue("not able to find a splittable region", region != null);
873
874      // Now split.
875      try {
876        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
877      } catch (IOException e) {
878        fail("Split execution should have succeeded with no exceptions thrown");
879      }
880
881      // Postcondition: split the table with no store files into two regions, but still have no
882      // store files
883      List<HRegion> daughters = cluster.getRegions(tableName);
884      assertEquals(2, daughters.size());
885
886      // check dirs
887      HBaseFsck.debugLsr(conf, new Path("/"));
888      Map<String, Path> storefilesAfter =
889        FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
890      assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0,
891        storefilesAfter.size());
892
893      hri = region.getRegionInfo(); // split parent
894      AssignmentManager am = cluster.getMaster().getAssignmentManager();
895      RegionStates regionStates = am.getRegionStates();
896      long start = EnvironmentEdgeManager.currentTime();
897      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
898        LOG.debug("Waiting for SPLIT state on: " + hri);
899        assertFalse("Timed out in waiting split parent to be in state SPLIT",
900          EnvironmentEdgeManager.currentTime() - start > 60000);
901        Thread.sleep(500);
902      }
903      assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN));
904      assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN));
905
906      // We should not be able to assign it again
907      try {
908        am.assign(hri);
909      } catch (DoNotRetryIOException e) {
910        // Expected
911      }
912      assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri));
913      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
914
915      // We should not be able to unassign it either
916      try {
917        am.unassign(hri);
918        fail("Should have thrown exception");
919      } catch (DoNotRetryIOException e) {
920        // Expected
921      }
922      assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri));
923      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
924    } finally {
925      admin.balancerSwitch(true, false);
926      cluster.getMaster().setCatalogJanitorEnabled(true);
927    }
928  }
929
930  @Test
931  public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception {
932    final TableName tableName = TableName.valueOf(name.getMethodName());
933    try {
934      byte[] cf = Bytes.toBytes("f");
935      byte[] cf1 = Bytes.toBytes("i_f");
936      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
937        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
938        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1))
939        .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build();
940      admin.createTable(htd);
941      List<HRegion> regions = awaitTableRegions(tableName);
942      HRegion region = regions.get(0);
943      for (int i = 3; i < 9; i++) {
944        Put p = new Put(Bytes.toBytes("row" + i));
945        p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
946        p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
947        region.put(p);
948      }
949      region.flush(true);
950      HStore store = region.getStore(cf);
951      Collection<HStoreFile> storefiles = store.getStorefiles();
952      assertEquals(1, storefiles.size());
953      assertFalse(region.hasReferences());
954      Path referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
955        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
956      assertNull(referencePath);
957      referencePath = region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
958        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
959      assertNotNull(referencePath);
960    } finally {
961      TESTING_UTIL.deleteTable(tableName);
962    }
963  }
964
965  private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
966    for (int i = 0; i < 5; ++i) {
967      for (HRegion r : regions) {
968        if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
969          return (r);
970        }
971      }
972      Thread.sleep(100);
973    }
974    return null;
975  }
976
977  private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException {
978    List<HRegion> daughters = null;
979    // try up to 10s
980    for (int i = 0; i < 100; i++) {
981      daughters = cluster.getRegions(tableName);
982      if (daughters.size() >= 2) {
983        break;
984      }
985      Thread.sleep(100);
986    }
987    assertTrue(daughters.size() >= 2);
988    return daughters;
989  }
990
991  private HMaster abortAndWaitForMaster() throws IOException, InterruptedException {
992    cluster.abortMaster(0);
993    cluster.waitOnMaster(0);
994    HMaster master = cluster.startMaster().getMaster();
995    cluster.waitForActiveAndReadyMaster();
996    // reset the connections
997    Closeables.close(admin, true);
998    TESTING_UTIL.invalidateConnection();
999    admin = TESTING_UTIL.getAdmin();
1000    return master;
1001  }
1002
1003  /**
1004   * Ensure single table region is not on same server as the single hbase:meta table region.
1005   * @return Index of the server hosting the single table region nn * @throws
1006   *         org.apache.hadoop.hbase.ZooKeeperConnectionException n
1007   */
1008  private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri)
1009    throws IOException, MasterNotRunningException, ZooKeeperConnectionException,
1010    InterruptedException {
1011    // Now make sure that the table region is not on same server as that hosting
1012    // hbase:meta We don't want hbase:meta replay polluting our test when we later crash
1013    // the table region serving server.
1014    int metaServerIndex = cluster.getServerWithMeta();
1015    boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(TESTING_UTIL.getConfiguration());
1016    if (tablesOnMaster) {
1017      // Need to check master is supposed to host meta... perhaps it is not.
1018      throw new UnsupportedOperationException();
1019      // TODO: assertTrue(metaServerIndex == -1); // meta is on master now
1020    }
1021    HRegionServer metaRegionServer =
1022      tablesOnMaster ? cluster.getMaster() : cluster.getRegionServer(metaServerIndex);
1023    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1024    assertTrue(tableRegionIndex != -1);
1025    HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1026    LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other="
1027      + tableRegionServer.getServerName());
1028    if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1029      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1030      assertNotNull(hrs);
1031      assertNotNull(hri);
1032      LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName()
1033        + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1034      admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName());
1035    }
1036    // Wait till table region is up on the server that is NOT carrying hbase:meta.
1037    for (int i = 0; i < 100; i++) {
1038      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1039      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1040      LOG.debug("Waiting on region move off the hbase:meta server; current index "
1041        + tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1042      Thread.sleep(100);
1043    }
1044    assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex,
1045      tableRegionIndex != -1 && tableRegionIndex != metaServerIndex);
1046    // Verify for sure table region is not on same server as hbase:meta
1047    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1048    assertTrue(tableRegionIndex != -1);
1049    assertNotSame(metaServerIndex, tableRegionIndex);
1050    return tableRegionIndex;
1051  }
1052
1053  /**
1054   * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers
1055   * since crashed servers occupy an index. nn * @return A regionserver that is not
1056   * <code>notThisOne</code> or null if none found
1057   */
1058  private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1059    final HRegionServer notThisOne) {
1060    for (RegionServerThread rst : cluster.getRegionServerThreads()) {
1061      HRegionServer hrs = rst.getRegionServer();
1062      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1063      if (hrs.isStopping() || hrs.isStopped()) continue;
1064      return hrs;
1065    }
1066    return null;
1067  }
1068
1069  private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException {
1070    List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1071    for (RegionInfo region : regions) {
1072      LOG.info(prefix + region.getRegionNameAsString());
1073    }
1074  }
1075
1076  private void waitUntilRegionServerDead() throws InterruptedException, IOException {
1077    // Wait until the master processes the RS shutdown
1078    for (int i =
1079      0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1080        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) {
1081      LOG.info("Waiting on server to go down");
1082      Thread.sleep(100);
1083    }
1084    assertFalse("Waited too long for RS to die",
1085      cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1086        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS);
1087  }
1088
1089  private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1090    // Wait till regions are back on line again.
1091    for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) {
1092      LOG.info("Waiting for repair to happen");
1093      Thread.sleep(1000);
1094    }
1095    if (cluster.getRegions(tableName).size() < numDaughters) {
1096      fail("Waiting too long for daughter regions");
1097    }
1098  }
1099
1100  private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1101    List<HRegion> regions = null;
1102    for (int i = 0; i < 100; i++) {
1103      regions = cluster.getRegions(tableName);
1104      if (regions.size() > 0) break;
1105      Thread.sleep(100);
1106    }
1107    return regions;
1108  }
1109
1110  private Table createTableAndWait(TableName tableName, byte[] cf)
1111    throws IOException, InterruptedException {
1112    Table t = TESTING_UTIL.createTable(tableName, cf);
1113    awaitTableRegions(tableName);
1114    assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0);
1115    return t;
1116  }
1117
1118  // Make it public so that JVMClusterUtil can access it.
1119  public static class MyMaster extends HMaster {
1120    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
1121      super(conf);
1122    }
1123
1124    @Override
1125    protected RSRpcServices createRpcServices() throws IOException {
1126      return new MyMasterRpcServices(this);
1127    }
1128  }
1129
1130  static class MyMasterRpcServices extends MasterRpcServices {
1131    static AtomicBoolean enabled = new AtomicBoolean(false);
1132
1133    private HMaster myMaster;
1134
1135    public MyMasterRpcServices(HMaster master) throws IOException {
1136      super(master);
1137      myMaster = master;
1138    }
1139
1140    @Override
1141    public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
1142      ReportRegionStateTransitionRequest req) throws ServiceException {
1143      ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
1144      if (
1145        enabled.get()
1146          && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT)
1147          && !resp.hasErrorMessage()
1148      ) {
1149        RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
1150        for (RegionStateNode regionState : regionStates.getRegionsInTransition()) {
1151          /*
1152           * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew())
1153           * { regionStates.deleteRegion(regionState.getRegion()); }
1154           */
1155        }
1156      }
1157      return resp;
1158    }
1159  }
1160
1161  static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
1162
1163    @Override
1164    protected boolean shouldSplit() {
1165      return true;
1166    }
1167
1168    @Override
1169    public boolean skipStoreFileRangeCheck(String familyName) {
1170      if (familyName.startsWith("i_")) {
1171        return true;
1172      } else {
1173        return false;
1174      }
1175    }
1176  }
1177}