001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertNotSame;
024import static org.junit.Assert.assertNull;
025import static org.junit.Assert.assertTrue;
026import static org.junit.Assert.fail;
027
028import java.io.IOException;
029import java.util.Collection;
030import java.util.List;
031import java.util.Map;
032import java.util.Optional;
033import java.util.concurrent.CountDownLatch;
034import java.util.concurrent.ExecutionException;
035import java.util.concurrent.TimeUnit;
036import java.util.concurrent.TimeoutException;
037import java.util.concurrent.atomic.AtomicBoolean;
038import org.apache.hadoop.conf.Configuration;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Coprocessor;
042import org.apache.hadoop.hbase.CoprocessorEnvironment;
043import org.apache.hadoop.hbase.DoNotRetryIOException;
044import org.apache.hadoop.hbase.HBaseClassTestRule;
045import org.apache.hadoop.hbase.HBaseTestingUtility;
046import org.apache.hadoop.hbase.HConstants;
047import org.apache.hadoop.hbase.HTableDescriptor;
048import org.apache.hadoop.hbase.MasterNotRunningException;
049import org.apache.hadoop.hbase.MetaTableAccessor;
050import org.apache.hadoop.hbase.MiniHBaseCluster;
051import org.apache.hadoop.hbase.ServerName;
052import org.apache.hadoop.hbase.StartMiniClusterOption;
053import org.apache.hadoop.hbase.TableName;
054import org.apache.hadoop.hbase.UnknownRegionException;
055import org.apache.hadoop.hbase.ZooKeeperConnectionException;
056import org.apache.hadoop.hbase.client.Admin;
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
058import org.apache.hadoop.hbase.client.Consistency;
059import org.apache.hadoop.hbase.client.Delete;
060import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
061import org.apache.hadoop.hbase.client.Get;
062import org.apache.hadoop.hbase.client.Mutation;
063import org.apache.hadoop.hbase.client.Put;
064import org.apache.hadoop.hbase.client.RegionInfo;
065import org.apache.hadoop.hbase.client.Result;
066import org.apache.hadoop.hbase.client.ResultScanner;
067import org.apache.hadoop.hbase.client.Scan;
068import org.apache.hadoop.hbase.client.Table;
069import org.apache.hadoop.hbase.client.TableDescriptor;
070import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
071import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
072import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
073import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
074import org.apache.hadoop.hbase.coprocessor.MasterObserver;
075import org.apache.hadoop.hbase.coprocessor.ObserverContext;
076import org.apache.hadoop.hbase.master.HMaster;
077import org.apache.hadoop.hbase.master.LoadBalancer;
078import org.apache.hadoop.hbase.master.MasterRpcServices;
079import org.apache.hadoop.hbase.master.RegionState;
080import org.apache.hadoop.hbase.master.RegionState.State;
081import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
082import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
083import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
084import org.apache.hadoop.hbase.master.assignment.RegionStates;
085import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
086import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
087import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
088import org.apache.hadoop.hbase.testclassification.LargeTests;
089import org.apache.hadoop.hbase.testclassification.RegionServerTests;
090import org.apache.hadoop.hbase.util.Bytes;
091import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
092import org.apache.hadoop.hbase.util.FSUtils;
093import org.apache.hadoop.hbase.util.FutureUtils;
094import org.apache.hadoop.hbase.util.HBaseFsck;
095import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
096import org.apache.hadoop.hbase.util.Threads;
097import org.apache.zookeeper.KeeperException;
098import org.apache.zookeeper.KeeperException.NodeExistsException;
099import org.junit.After;
100import org.junit.AfterClass;
101import org.junit.Assert;
102import org.junit.Before;
103import org.junit.BeforeClass;
104import org.junit.ClassRule;
105import org.junit.Rule;
106import org.junit.Test;
107import org.junit.experimental.categories.Category;
108import org.junit.rules.TestName;
109import org.slf4j.Logger;
110import org.slf4j.LoggerFactory;
111
112import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
113import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
114
115import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
116import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
117import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
118import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
119
120/**
121 * The below tests are testing split region against a running cluster
122 */
123@Category({RegionServerTests.class, LargeTests.class})
124public class TestSplitTransactionOnCluster {
125
126  @ClassRule
127  public static final HBaseClassTestRule CLASS_RULE =
128      HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class);
129
130  private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class);
131  private Admin admin = null;
132  private MiniHBaseCluster cluster = null;
133  private static final int NB_SERVERS = 3;
134
135  static final HBaseTestingUtility TESTING_UTIL =
136    new HBaseTestingUtility();
137
138  @Rule
139  public TestName name = new TestName();
140
141  @BeforeClass public static void before() throws Exception {
142    TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
143    StartMiniClusterOption option = StartMiniClusterOption.builder()
144        .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build();
145    TESTING_UTIL.startMiniCluster(option);
146  }
147
148  @AfterClass public static void after() throws Exception {
149    TESTING_UTIL.shutdownMiniCluster();
150  }
151
152  @Before public void setup() throws IOException {
153    TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
154    this.admin = TESTING_UTIL.getAdmin();
155    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
156  }
157
158  @After
159  public void tearDown() throws Exception {
160    this.admin.close();
161    for (TableDescriptor htd: this.admin.listTableDescriptors()) {
162      LOG.info("Tear down, remove table=" + htd.getTableName());
163      TESTING_UTIL.deleteTable(htd.getTableName());
164    }
165  }
166
167  private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
168      throws IOException, InterruptedException {
169    assertEquals(1, regions.size());
170    RegionInfo hri = regions.get(0).getRegionInfo();
171    AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
172    return hri;
173  }
174
175  private void requestSplitRegion(
176      final HRegionServer rsServer,
177      final Region region,
178      final byte[] midKey) throws IOException {
179    long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0);
180    // wait for the split to complete or get interrupted.  If the split completes successfully,
181    // the procedure will return true; if the split fails, the procedure would throw exception.
182    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
183  }
184
185  @Test
186  public void testRITStateForRollback() throws Exception {
187    final TableName tableName = TableName.valueOf(name.getMethodName());
188    final HMaster master = cluster.getMaster();
189    try {
190      // Create table then get the single region for our new table.
191      Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
192      final List<HRegion> regions = cluster.getRegions(tableName);
193      final RegionInfo hri = getAndCheckSingleTableRegion(regions);
194      insertData(tableName, admin, t);
195      t.close();
196
197      // Turn off balancer so it doesn't cut in and mess up our placements.
198      this.admin.balancerSwitch(false, true);
199      // Turn off the meta scanner so it don't remove parent on us.
200      master.setCatalogJanitorEnabled(false);
201
202      // find a splittable region
203      final HRegion region = findSplittableRegion(regions);
204      assertTrue("not able to find a splittable region", region != null);
205
206      // install master co-processor to fail splits
207      master.getMasterCoprocessorHost().load(
208        FailingSplitMasterObserver.class,
209        Coprocessor.PRIORITY_USER,
210        master.getConfiguration());
211
212      // split async
213      this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 });
214
215      // we have to wait until the SPLITTING state is seen by the master
216      FailingSplitMasterObserver observer =
217          master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class);
218      assertNotNull(observer);
219      observer.latch.await();
220
221      LOG.info("Waiting for region to come out of RIT");
222      while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) {
223        Threads.sleep(100);
224      }
225      assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri));
226    } finally {
227      admin.balancerSwitch(true, false);
228      master.setCatalogJanitorEnabled(true);
229      abortAndWaitForMaster();
230      TESTING_UTIL.deleteTable(tableName);
231    }
232  }
233
234  @Test
235  public void testSplitFailedCompactionAndSplit() throws Exception {
236    final TableName tableName = TableName.valueOf(name.getMethodName());
237    // Create table then get the single region for our new table.
238    byte[] cf = Bytes.toBytes("cf");
239    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
240      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
241    admin.createTable(htd);
242
243    for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) {
244      Thread.sleep(100);
245    }
246    assertEquals(1, cluster.getRegions(tableName).size());
247
248    HRegion region = cluster.getRegions(tableName).get(0);
249    HStore store = region.getStore(cf);
250    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
251    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
252
253    Table t = TESTING_UTIL.getConnection().getTable(tableName);
254    // insert data
255    insertData(tableName, admin, t);
256    insertData(tableName, admin, t);
257
258    int fileNum = store.getStorefiles().size();
259    // 0, Compaction Request
260    store.triggerMajorCompaction();
261    Optional<CompactionContext> cc = store.requestCompaction();
262    assertTrue(cc.isPresent());
263    // 1, A timeout split
264    // 1.1 close region
265    assertEquals(2, region.close(false).get(cf).size());
266    // 1.2 rollback and Region initialize again
267    region.initialize();
268
269    // 2, Run Compaction cc
270    assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE));
271    assertTrue(fileNum > store.getStorefiles().size());
272
273    // 3, Split
274    requestSplitRegion(regionServer, region, Bytes.toBytes("row3"));
275    assertEquals(2, cluster.getRegions(tableName).size());
276  }
277
278  public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver {
279    volatile CountDownLatch latch;
280
281    @Override
282    public void start(CoprocessorEnvironment e) throws IOException {
283      latch = new CountDownLatch(1);
284    }
285
286    @Override
287    public Optional<MasterObserver> getMasterObserver() {
288      return Optional.of(this);
289    }
290
291    @Override
292    public void preSplitRegionBeforeMETAAction(
293        final ObserverContext<MasterCoprocessorEnvironment> ctx,
294        final byte[] splitKey,
295        final List<Mutation> metaEntries) throws IOException {
296      latch.countDown();
297      throw new IOException("Causing rollback of region split");
298    }
299  }
300
301  @Test
302  public void testSplitRollbackOnRegionClosing() throws Exception {
303    final TableName tableName = TableName.valueOf(name.getMethodName());
304
305    // Create table then get the single region for our new table.
306    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
307    List<HRegion> regions = cluster.getRegions(tableName);
308    RegionInfo hri = getAndCheckSingleTableRegion(regions);
309
310    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
311
312    RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
313
314    // Turn off balancer so it doesn't cut in and mess up our placements.
315    this.admin.balancerSwitch(false, true);
316    // Turn off the meta scanner so it don't remove parent on us.
317    cluster.getMaster().setCatalogJanitorEnabled(false);
318    try {
319      // Add a bit of load up into the table so splittable.
320      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
321      // Get region pre-split.
322      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
323      printOutRegions(server, "Initial regions: ");
324      int regionCount = cluster.getRegions(hri.getTable()).size();
325      regionStates.updateRegionState(hri, RegionState.State.CLOSING);
326
327      // Now try splitting.... should fail.  And each should successfully
328      // rollback.
329      // We don't roll back here anymore. Instead we fail-fast on construction of the
330      // split transaction. Catch the exception instead.
331      try {
332        FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName()));
333        fail();
334      } catch (DoNotRetryRegionException e) {
335        // Expected
336      }
337      // Wait around a while and assert count of regions remains constant.
338      for (int i = 0; i < 10; i++) {
339        Thread.sleep(100);
340        assertEquals(regionCount, cluster.getRegions(hri.getTable()).size());
341      }
342      regionStates.updateRegionState(hri, State.OPEN);
343      // Now try splitting and it should work.
344      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
345      // Get daughters
346      checkAndGetDaughters(tableName);
347      // OK, so split happened after we cleared the blocking node.
348    } finally {
349      admin.balancerSwitch(true, false);
350      cluster.getMaster().setCatalogJanitorEnabled(true);
351      t.close();
352    }
353  }
354
355  /**
356   * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't
357   * find the immediate daughter of an offlined parent.
358   */
359  @Test
360  public void testShutdownFixupWhenDaughterHasSplit() throws Exception {
361    final TableName tableName = TableName.valueOf(name.getMethodName());
362
363    // Create table then get the single region for our new table.
364    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
365    List<HRegion> regions = cluster.getRegions(tableName);
366    RegionInfo hri = getAndCheckSingleTableRegion(regions);
367
368    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
369
370    // Turn off balancer so it doesn't cut in and mess up our placements.
371    this.admin.balancerSwitch(false, true);
372    // Turn off the meta scanner so it don't remove parent on us.
373    cluster.getMaster().setCatalogJanitorEnabled(false);
374    try {
375      // Add a bit of load up into the table so splittable.
376      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
377      // Get region pre-split.
378      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
379      printOutRegions(server, "Initial regions: ");
380      // Now split.
381      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
382      // Get daughters
383      List<HRegion> daughters = checkAndGetDaughters(tableName);
384      HRegion daughterRegion = daughters.get(0);
385      // Now split one of the daughters.
386      RegionInfo daughter = daughterRegion.getRegionInfo();
387      LOG.info("Daughter we are going to split: " + daughter);
388      // Compact first to ensure we have cleaned up references -- else the split
389      // will fail.
390      daughterRegion.compact(true);
391      daughterRegion.getStores().get(0).closeAndArchiveCompactedFiles();
392      for (int i = 0; i < 100; i++) {
393        if (!daughterRegion.hasReferences()) {
394          break;
395        }
396        Threads.sleep(100);
397      }
398      assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
399      LOG.info("Daughter hri before split (has been compacted): " + daughter);
400      admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES);
401      // Get list of daughters
402      daughters = cluster.getRegions(tableName);
403      for (HRegion d: daughters) {
404        LOG.info("Regions before crash: " + d);
405      }
406      // Now crash the server
407      cluster.abortRegionServer(tableRegionIndex);
408      waitUntilRegionServerDead();
409      awaitDaughters(tableName, daughters.size());
410      // Assert daughters are online and ONLY the original daughters -- that
411      // fixup didn't insert one during server shutdown recover.
412      regions = cluster.getRegions(tableName);
413      for (HRegion d: daughters) {
414        LOG.info("Regions after crash: " + d);
415      }
416      if (daughters.size() != regions.size()) {
417        LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size());
418      }
419      assertEquals(daughters.size(), regions.size());
420      for (HRegion r: regions) {
421        LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r));
422        assertTrue("Missing region post crash " + r, daughters.contains(r));
423      }
424    } finally {
425      LOG.info("EXITING");
426      admin.balancerSwitch(true, false);
427      cluster.getMaster().setCatalogJanitorEnabled(true);
428      t.close();
429    }
430  }
431
432  @Test
433  public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
434    TableName userTableName = TableName.valueOf(name.getMethodName());
435    TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName)
436      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build();
437    admin.createTable(htd);
438    Table table = TESTING_UTIL.getConnection().getTable(userTableName);
439    try {
440      for (int i = 0; i <= 5; i++) {
441        String row = "row" + i;
442        Put p = new Put(Bytes.toBytes(row));
443        String val = "Val" + i;
444        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val));
445        table.put(p);
446        admin.flush(userTableName);
447        Delete d = new Delete(Bytes.toBytes(row));
448        // Do a normal delete
449        table.delete(d);
450        admin.flush(userTableName);
451      }
452      admin.majorCompact(userTableName);
453      List<RegionInfo> regionsOfTable =
454          cluster.getMaster().getAssignmentManager().getRegionStates()
455          .getRegionsOfTable(userTableName);
456      assertEquals(1, regionsOfTable.size());
457      RegionInfo hRegionInfo = regionsOfTable.get(0);
458      Put p = new Put(Bytes.toBytes("row6"));
459      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
460      table.put(p);
461      p = new Put(Bytes.toBytes("row7"));
462      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
463      table.put(p);
464      p = new Put(Bytes.toBytes("row8"));
465      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
466      table.put(p);
467      admin.flush(userTableName);
468      admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7"));
469      regionsOfTable = cluster.getMaster()
470          .getAssignmentManager().getRegionStates()
471          .getRegionsOfTable(userTableName);
472
473      while (regionsOfTable.size() != 2) {
474        Thread.sleep(1000);
475        regionsOfTable = cluster.getMaster()
476            .getAssignmentManager().getRegionStates()
477            .getRegionsOfTable(userTableName);
478        LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() +
479          ": " + regionsOfTable);
480
481      }
482      Assert.assertEquals(2, regionsOfTable.size());
483
484      Scan s = new Scan();
485      ResultScanner scanner = table.getScanner(s);
486      int mainTableCount = 0;
487      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
488        mainTableCount++;
489      }
490      Assert.assertEquals(3, mainTableCount);
491    } finally {
492      table.close();
493    }
494  }
495
496  /**
497   * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could
498   * remove the parent region the master is killed and restarted.
499   */
500  @Test
501  public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
502      throws IOException, InterruptedException, NodeExistsException, KeeperException,
503      ServiceException, ExecutionException, TimeoutException {
504    final TableName tableName = TableName.valueOf(name.getMethodName());
505    // Create table then get the single region for our new table.
506    try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) {
507      List<HRegion> regions = cluster.getRegions(tableName);
508      RegionInfo hri = getAndCheckSingleTableRegion(regions);
509
510      int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
511
512      // Turn off balancer so it doesn't cut in and mess up our placements.
513      this.admin.balancerSwitch(false, true);
514      // Turn off the meta scanner so it don't remove parent on us.
515      cluster.getMaster().setCatalogJanitorEnabled(false);
516      // Add a bit of load up into the table so splittable.
517      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
518      // Get region pre-split.
519      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
520      printOutRegions(server, "Initial regions: ");
521      // Call split.
522      this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
523      List<HRegion> daughters = checkAndGetDaughters(tableName);
524
525      // Before cleanup, get a new master.
526      HMaster master = abortAndWaitForMaster();
527      // Now call compact on the daughters and clean up any references.
528      for (HRegion daughter : daughters) {
529        daughter.compact(true);
530        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
531        assertFalse(daughter.hasReferences());
532      }
533      // BUT calling compact on the daughters is not enough. The CatalogJanitor looks
534      // in the filesystem, and the filesystem content is not same as what the Region
535      // is reading from. Compacted-away files are picked up later by the compacted
536      // file discharger process. It runs infrequently. Make it run so CatalogJanitor
537      // doens't find any references.
538      for (RegionServerThread rst : cluster.getRegionServerThreads()) {
539        boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
540        rst.getRegionServer().compactedFileDischarger.run();
541        rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
542      }
543      cluster.getMaster().setCatalogJanitorEnabled(true);
544      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
545      LOG.info("Starting run of CatalogJanitor");
546      cluster.getMaster().getCatalogJanitor().run();
547      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
548      RegionStates regionStates = master.getAssignmentManager().getRegionStates();
549      ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
550      assertEquals(null, regionServerOfRegion);
551    } finally {
552      TESTING_UTIL.getAdmin().balancerSwitch(true, false);
553      cluster.getMaster().setCatalogJanitorEnabled(true);
554    }
555  }
556
557  @Test
558  public void testSplitWithRegionReplicas() throws Exception {
559    final TableName tableName = TableName.valueOf(name.getMethodName());
560    HTableDescriptor htd = TESTING_UTIL.createTableDescriptor(name.getMethodName());
561    htd.setRegionReplication(2);
562    htd.addCoprocessor(SlowMeCopro.class.getName());
563    // Create table then get the single region for our new table.
564    Table t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")}, null);
565    List<HRegion> oldRegions;
566    do {
567      oldRegions = cluster.getRegions(tableName);
568      Thread.sleep(10);
569    } while (oldRegions.size() != 2);
570    for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo());
571    try {
572      int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo()
573        .getRegionName());
574      HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
575      insertData(tableName, admin, t);
576      // Turn off balancer so it doesn't cut in and mess up our placements.
577      admin.balancerSwitch(false, true);
578      // Turn off the meta scanner so it don't remove parent on us.
579      cluster.getMaster().setCatalogJanitorEnabled(false);
580      boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
581          tableName);
582      assertEquals("The specified table should be present.", true, tableExists);
583      final HRegion region = findSplittableRegion(oldRegions);
584      regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
585      regionServer = cluster.getRegionServer(regionServerIndex);
586      assertTrue("not able to find a splittable region", region != null);
587      try {
588        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
589      } catch (IOException e) {
590        e.printStackTrace();
591        fail("Split execution should have succeeded with no exceptions thrown " + e);
592      }
593      //TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
594      List<HRegion> newRegions;
595      do {
596        newRegions = cluster.getRegions(tableName);
597        for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo());
598        Thread.sleep(1000);
599      } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
600          || newRegions.size() != 4);
601      tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
602          tableName);
603      assertEquals("The specified table should be present.", true, tableExists);
604      // exists works on stale and we see the put after the flush
605      byte[] b1 = Bytes.toBytes("row1");
606      Get g = new Get(b1);
607      g.setConsistency(Consistency.STRONG);
608      // The following GET will make a trip to the meta to get the new location of the 1st daughter
609      // In the process it will also get the location of the replica of the daughter (initially
610      // pointing to the parent's replica)
611      Result r = t.get(g);
612      Assert.assertFalse(r.isStale());
613      LOG.info("exists stale after flush done");
614
615      SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
616      g = new Get(b1);
617      g.setConsistency(Consistency.TIMELINE);
618      // This will succeed because in the previous GET we get the location of the replica
619      r = t.get(g);
620      Assert.assertTrue(r.isStale());
621      SlowMeCopro.getPrimaryCdl().get().countDown();
622    } finally {
623      SlowMeCopro.getPrimaryCdl().get().countDown();
624      admin.balancerSwitch(true, false);
625      cluster.getMaster().setCatalogJanitorEnabled(true);
626      t.close();
627    }
628  }
629
630  private void insertData(final TableName tableName, Admin admin, Table t) throws IOException,
631      InterruptedException {
632    Put p = new Put(Bytes.toBytes("row1"));
633    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
634    t.put(p);
635    p = new Put(Bytes.toBytes("row2"));
636    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
637    t.put(p);
638    p = new Put(Bytes.toBytes("row3"));
639    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
640    t.put(p);
641    p = new Put(Bytes.toBytes("row4"));
642    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
643    t.put(p);
644    admin.flush(tableName);
645  }
646
647  /**
648   * If a table has regions that have no store files in a region, they should split successfully
649   * into two regions with no store files.
650   */
651  @Test
652  public void testSplitRegionWithNoStoreFiles() throws Exception {
653    final TableName tableName = TableName.valueOf(name.getMethodName());
654    // Create table then get the single region for our new table.
655    createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
656    List<HRegion> regions = cluster.getRegions(tableName);
657    RegionInfo hri = getAndCheckSingleTableRegion(regions);
658    ensureTableRegionNotOnSameServerAsMeta(admin, hri);
659    int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
660      .getRegionName());
661    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
662    // Turn off balancer so it doesn't cut in and mess up our placements.
663    this.admin.balancerSwitch(false, true);
664    // Turn off the meta scanner so it don't remove parent on us.
665    cluster.getMaster().setCatalogJanitorEnabled(false);
666    try {
667      // Precondition: we created a table with no data, no store files.
668      printOutRegions(regionServer, "Initial regions: ");
669      Configuration conf = cluster.getConfiguration();
670      HBaseFsck.debugLsr(conf, new Path("/"));
671      Path rootDir = FSUtils.getRootDir(conf);
672      FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
673      Map<String, Path> storefiles =
674          FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
675      assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size());
676
677      // find a splittable region.  Refresh the regions list
678      regions = cluster.getRegions(tableName);
679      final HRegion region = findSplittableRegion(regions);
680      assertTrue("not able to find a splittable region", region != null);
681
682      // Now split.
683      try {
684        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
685      } catch (IOException e) {
686        fail("Split execution should have succeeded with no exceptions thrown");
687      }
688
689      // Postcondition: split the table with no store files into two regions, but still have no
690      // store files
691      List<HRegion> daughters = cluster.getRegions(tableName);
692      assertEquals(2, daughters.size());
693
694      // check dirs
695      HBaseFsck.debugLsr(conf, new Path("/"));
696      Map<String, Path> storefilesAfter =
697          FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
698      assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0,
699          storefilesAfter.size());
700
701      hri = region.getRegionInfo(); // split parent
702      AssignmentManager am = cluster.getMaster().getAssignmentManager();
703      RegionStates regionStates = am.getRegionStates();
704      long start = EnvironmentEdgeManager.currentTime();
705      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
706        LOG.debug("Waiting for SPLIT state on: " + hri);
707        assertFalse("Timed out in waiting split parent to be in state SPLIT",
708          EnvironmentEdgeManager.currentTime() - start > 60000);
709        Thread.sleep(500);
710      }
711      assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN));
712      assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN));
713
714      // We should not be able to assign it again
715      try {
716        am.assign(hri);
717      } catch (DoNotRetryIOException e) {
718        // Expected
719      }
720      assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri));
721      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
722
723      // We should not be able to unassign it either
724      try {
725        am.unassign(hri);
726        fail("Should have thrown exception");
727      } catch (DoNotRetryIOException e) {
728        // Expected
729      }
730      assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri));
731      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
732    } finally {
733      admin.balancerSwitch(true, false);
734      cluster.getMaster().setCatalogJanitorEnabled(true);
735    }
736  }
737
738  @Test
739  public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
740      throws Exception {
741    final TableName tableName = TableName.valueOf(name.getMethodName());
742    try {
743      byte[] cf = Bytes.toBytes("f");
744      byte[] cf1 = Bytes.toBytes("i_f");
745      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
746        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
747        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1))
748        .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build();
749      admin.createTable(htd);
750      List<HRegion> regions = awaitTableRegions(tableName);
751      HRegion region = regions.get(0);
752      for(int i = 3;i<9;i++) {
753        Put p = new Put(Bytes.toBytes("row"+i));
754        p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
755        p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
756        region.put(p);
757      }
758      region.flush(true);
759      HStore store = region.getStore(cf);
760      Collection<HStoreFile> storefiles = store.getStorefiles();
761      assertEquals(1, storefiles.size());
762      assertFalse(region.hasReferences());
763      Path referencePath =
764          region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
765            storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
766      assertNull(referencePath);
767      referencePath =
768          region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
769            storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
770      assertNotNull(referencePath);
771    } finally {
772      TESTING_UTIL.deleteTable(tableName);
773    }
774  }
775
776  private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
777    for (int i = 0; i < 5; ++i) {
778      for (HRegion r: regions) {
779        if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
780          return(r);
781        }
782      }
783      Thread.sleep(100);
784    }
785    return null;
786  }
787
788  private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException {
789    List<HRegion> daughters = null;
790    // try up to 10s
791    for (int i = 0; i < 100; i++) {
792      daughters = cluster.getRegions(tableName);
793      if (daughters.size() >= 2) {
794        break;
795      }
796      Thread.sleep(100);
797    }
798    assertTrue(daughters.size() >= 2);
799    return daughters;
800  }
801
802  private HMaster abortAndWaitForMaster() throws IOException, InterruptedException {
803    cluster.abortMaster(0);
804    cluster.waitOnMaster(0);
805    HMaster master = cluster.startMaster().getMaster();
806    cluster.waitForActiveAndReadyMaster();
807    return master;
808  }
809
810  /**
811   * Ensure single table region is not on same server as the single hbase:meta table
812   * region.
813   * @param admin
814   * @param hri
815   * @return Index of the server hosting the single table region
816   * @throws UnknownRegionException
817   * @throws MasterNotRunningException
818   * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
819   * @throws InterruptedException
820   */
821  private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
822      final RegionInfo hri)
823  throws IOException, MasterNotRunningException,
824  ZooKeeperConnectionException, InterruptedException {
825    // Now make sure that the table region is not on same server as that hosting
826    // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
827    // the table region serving server.
828    int metaServerIndex = cluster.getServerWithMeta();
829    boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(TESTING_UTIL.getConfiguration());
830    if (tablesOnMaster) {
831      // Need to check master is supposed to host meta... perhaps it is not.
832      throw new UnsupportedOperationException();
833      // TODO: assertTrue(metaServerIndex == -1); // meta is on master now
834    }
835    HRegionServer metaRegionServer = tablesOnMaster?
836      cluster.getMaster(): cluster.getRegionServer(metaServerIndex);
837    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
838    assertTrue(tableRegionIndex != -1);
839    HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
840    LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() +
841      ", other=" + tableRegionServer.getServerName());
842    if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
843      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
844      assertNotNull(hrs);
845      assertNotNull(hri);
846      LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
847        metaRegionServer.getServerName() + " to " +
848        hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
849      admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName());
850    }
851    // Wait till table region is up on the server that is NOT carrying hbase:meta.
852    for (int i = 0; i < 100; i++) {
853      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
854      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
855      LOG.debug("Waiting on region move off the hbase:meta server; current index " +
856        tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
857      Thread.sleep(100);
858    }
859    assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex,
860      tableRegionIndex != -1 && tableRegionIndex != metaServerIndex);
861    // Verify for sure table region is not on same server as hbase:meta
862    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
863    assertTrue(tableRegionIndex != -1);
864    assertNotSame(metaServerIndex, tableRegionIndex);
865    return tableRegionIndex;
866  }
867
868  /**
869   * Find regionserver other than the one passed.
870   * Can't rely on indexes into list of regionservers since crashed servers
871   * occupy an index.
872   * @param cluster
873   * @param notThisOne
874   * @return A regionserver that is not <code>notThisOne</code> or null if none
875   * found
876   */
877  private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
878      final HRegionServer notThisOne) {
879    for (RegionServerThread rst: cluster.getRegionServerThreads()) {
880      HRegionServer hrs = rst.getRegionServer();
881      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
882      if (hrs.isStopping() || hrs.isStopped()) continue;
883      return hrs;
884    }
885    return null;
886  }
887
888  private void printOutRegions(final HRegionServer hrs, final String prefix)
889      throws IOException {
890    List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
891    for (RegionInfo region: regions) {
892      LOG.info(prefix + region.getRegionNameAsString());
893    }
894  }
895
896  private void waitUntilRegionServerDead() throws InterruptedException, IOException {
897    // Wait until the master processes the RS shutdown
898    for (int i=0; (cluster.getMaster().getClusterMetrics()
899        .getLiveServerMetrics().size() > NB_SERVERS
900        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i<100; i++) {
901      LOG.info("Waiting on server to go down");
902      Thread.sleep(100);
903    }
904    assertFalse("Waited too long for RS to die",
905      cluster.getMaster().getClusterMetrics(). getLiveServerMetrics().size() > NB_SERVERS
906        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS);
907  }
908
909  private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
910    // Wait till regions are back on line again.
911    for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) {
912      LOG.info("Waiting for repair to happen");
913      Thread.sleep(1000);
914    }
915    if (cluster.getRegions(tableName).size() < numDaughters) {
916      fail("Waiting too long for daughter regions");
917    }
918  }
919
920  private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
921    List<HRegion> regions = null;
922    for (int i = 0; i < 100; i++) {
923      regions = cluster.getRegions(tableName);
924      if (regions.size() > 0) break;
925      Thread.sleep(100);
926    }
927    return regions;
928  }
929
930  private Table createTableAndWait(TableName tableName, byte[] cf) throws IOException,
931      InterruptedException {
932    Table t = TESTING_UTIL.createTable(tableName, cf);
933    awaitTableRegions(tableName);
934    assertTrue("Table not online: " + tableName,
935      cluster.getRegions(tableName).size() != 0);
936    return t;
937  }
938
939  // Make it public so that JVMClusterUtil can access it.
940  public static class MyMaster extends HMaster {
941    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
942      super(conf);
943    }
944
945    @Override
946    protected RSRpcServices createRpcServices() throws IOException {
947      return new MyMasterRpcServices(this);
948    }
949  }
950
951  static class MyMasterRpcServices extends MasterRpcServices {
952    static AtomicBoolean enabled = new AtomicBoolean(false);
953
954    private HMaster myMaster;
955    public MyMasterRpcServices(HMaster master) throws IOException {
956      super(master);
957      myMaster = master;
958    }
959
960    @Override
961    public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
962        ReportRegionStateTransitionRequest req) throws ServiceException {
963      ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
964      if (enabled.get() && req.getTransition(0).getTransitionCode().equals(
965          TransitionCode.READY_TO_SPLIT) && !resp.hasErrorMessage()) {
966        RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
967        for (RegionStateNode regionState:
968          regionStates.getRegionsInTransition()) {
969          /* TODO!!!!
970          // Find the merging_new region and remove it
971          if (regionState.isSplittingNew()) {
972            regionStates.deleteRegion(regionState.getRegion());
973          }
974          */
975        }
976      }
977      return resp;
978    }
979  }
980
981  static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
982
983    @Override
984    protected boolean shouldSplit() {
985      return true;
986    }
987
988    @Override
989    public boolean skipStoreFileRangeCheck(String familyName) {
990      if(familyName.startsWith("i_")) {
991        return true;
992      } else {
993        return false;
994      }
995    }
996  }
997}
998