001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertNotSame;
024import static org.junit.Assert.assertNull;
025import static org.junit.Assert.assertTrue;
026import static org.junit.Assert.fail;
027
028import java.io.IOException;
029import java.util.Collection;
030import java.util.List;
031import java.util.Map;
032import java.util.Optional;
033import java.util.concurrent.CountDownLatch;
034import java.util.concurrent.TimeUnit;
035import java.util.concurrent.atomic.AtomicBoolean;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.fs.FileSystem;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.hbase.Coprocessor;
040import org.apache.hadoop.hbase.CoprocessorEnvironment;
041import org.apache.hadoop.hbase.HBaseClassTestRule;
042import org.apache.hadoop.hbase.HBaseTestingUtility;
043import org.apache.hadoop.hbase.HColumnDescriptor;
044import org.apache.hadoop.hbase.HConstants;
045import org.apache.hadoop.hbase.HTableDescriptor;
046import org.apache.hadoop.hbase.MasterNotRunningException;
047import org.apache.hadoop.hbase.MetaTableAccessor;
048import org.apache.hadoop.hbase.MiniHBaseCluster;
049import org.apache.hadoop.hbase.ServerName;
050import org.apache.hadoop.hbase.TableName;
051import org.apache.hadoop.hbase.UnknownRegionException;
052import org.apache.hadoop.hbase.ZooKeeperConnectionException;
053import org.apache.hadoop.hbase.client.Admin;
054import org.apache.hadoop.hbase.client.CompactionState;
055import org.apache.hadoop.hbase.client.Consistency;
056import org.apache.hadoop.hbase.client.Delete;
057import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
058import org.apache.hadoop.hbase.client.Get;
059import org.apache.hadoop.hbase.client.Mutation;
060import org.apache.hadoop.hbase.client.Put;
061import org.apache.hadoop.hbase.client.RegionInfo;
062import org.apache.hadoop.hbase.client.Result;
063import org.apache.hadoop.hbase.client.ResultScanner;
064import org.apache.hadoop.hbase.client.Scan;
065import org.apache.hadoop.hbase.client.Table;
066import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
067import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
068import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
069import org.apache.hadoop.hbase.coprocessor.MasterObserver;
070import org.apache.hadoop.hbase.coprocessor.ObserverContext;
071import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
072import org.apache.hadoop.hbase.master.HMaster;
073import org.apache.hadoop.hbase.master.LoadBalancer;
074import org.apache.hadoop.hbase.master.MasterRpcServices;
075import org.apache.hadoop.hbase.master.RegionState;
076import org.apache.hadoop.hbase.master.RegionState.State;
077import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
078import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
079import org.apache.hadoop.hbase.master.assignment.RegionStates;
080import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
081import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
082import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
083import org.apache.hadoop.hbase.testclassification.LargeTests;
084import org.apache.hadoop.hbase.testclassification.RegionServerTests;
085import org.apache.hadoop.hbase.util.Bytes;
086import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
087import org.apache.hadoop.hbase.util.FSUtils;
088import org.apache.hadoop.hbase.util.HBaseFsck;
089import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
090import org.apache.hadoop.hbase.util.RetryCounter;
091import org.apache.hadoop.hbase.util.Threads;
092import org.apache.zookeeper.KeeperException;
093import org.apache.zookeeper.KeeperException.NodeExistsException;
094import org.junit.After;
095import org.junit.AfterClass;
096import org.junit.Assert;
097import org.junit.Before;
098import org.junit.BeforeClass;
099import org.junit.ClassRule;
100import org.junit.Rule;
101import org.junit.Test;
102import org.junit.experimental.categories.Category;
103import org.junit.rules.TestName;
104import org.slf4j.Logger;
105import org.slf4j.LoggerFactory;
106
107import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
108import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
109
110import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
111import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
112import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
113import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
114
115/**
116 * The below tests are testing split region against a running cluster
117 */
118@Category({RegionServerTests.class, LargeTests.class})
119@SuppressWarnings("deprecation")
120public class TestSplitTransactionOnCluster {
121
122  @ClassRule
123  public static final HBaseClassTestRule CLASS_RULE =
124      HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class);
125
126  private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class);
127  private Admin admin = null;
128  private MiniHBaseCluster cluster = null;
129  private static final int NB_SERVERS = 3;
130
131  static final HBaseTestingUtility TESTING_UTIL =
132    new HBaseTestingUtility();
133
134  @Rule
135  public TestName name = new TestName();
136
137  @BeforeClass public static void before() throws Exception {
138    TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
139    TESTING_UTIL.startMiniCluster(1, NB_SERVERS, null, MyMaster.class, null);
140  }
141
142  @AfterClass public static void after() throws Exception {
143    TESTING_UTIL.shutdownMiniCluster();
144  }
145
146  @Before public void setup() throws IOException {
147    TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
148    this.admin = TESTING_UTIL.getAdmin();
149    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
150  }
151
152  @After
153  public void tearDown() throws Exception {
154    this.admin.close();
155    for (HTableDescriptor htd: this.admin.listTables()) {
156      LOG.info("Tear down, remove table=" + htd.getTableName());
157      TESTING_UTIL.deleteTable(htd.getTableName());
158    }
159  }
160
161  private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
162      throws IOException, InterruptedException {
163    assertEquals(1, regions.size());
164    RegionInfo hri = regions.get(0).getRegionInfo();
165    AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
166    return hri;
167  }
168
169  private void requestSplitRegion(
170      final HRegionServer rsServer,
171      final Region region,
172      final byte[] midKey) throws IOException {
173    long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0);
174    // wait for the split to complete or get interrupted.  If the split completes successfully,
175    // the procedure will return true; if the split fails, the procedure would throw exception.
176    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
177  }
178
179  @Test
180  public void testRITStateForRollback() throws Exception {
181    final TableName tableName = TableName.valueOf(name.getMethodName());
182    final HMaster master = cluster.getMaster();
183    try {
184      // Create table then get the single region for our new table.
185      Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
186      final List<HRegion> regions = cluster.getRegions(tableName);
187      final RegionInfo hri = getAndCheckSingleTableRegion(regions);
188      insertData(tableName, admin, t);
189      t.close();
190
191      // Turn off balancer so it doesn't cut in and mess up our placements.
192      this.admin.setBalancerRunning(false, true);
193      // Turn off the meta scanner so it don't remove parent on us.
194      master.setCatalogJanitorEnabled(false);
195
196      // find a splittable region
197      final HRegion region = findSplittableRegion(regions);
198      assertTrue("not able to find a splittable region", region != null);
199
200      // install master co-processor to fail splits
201      master.getMasterCoprocessorHost().load(
202        FailingSplitMasterObserver.class,
203        Coprocessor.PRIORITY_USER,
204        master.getConfiguration());
205
206      // split async
207      this.admin.splitRegion(region.getRegionInfo().getRegionName(), new byte[] {42});
208
209      // we have to wait until the SPLITTING state is seen by the master
210      FailingSplitMasterObserver observer =
211          master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class);
212      assertNotNull(observer);
213      observer.latch.await();
214
215      LOG.info("Waiting for region to come out of RIT");
216      while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) {
217        Threads.sleep(100);
218      }
219      assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri));
220    } finally {
221      admin.setBalancerRunning(true, false);
222      master.setCatalogJanitorEnabled(true);
223      abortAndWaitForMaster();
224      TESTING_UTIL.deleteTable(tableName);
225    }
226  }
227
228  @Test
229  public void testSplitFailedCompactionAndSplit() throws Exception {
230    final TableName tableName = TableName.valueOf(name.getMethodName());
231    // Create table then get the single region for our new table.
232    HTableDescriptor htd = new HTableDescriptor(tableName);
233    byte[] cf = Bytes.toBytes("cf");
234    htd.addFamily(new HColumnDescriptor(cf));
235    admin.createTable(htd);
236
237    for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) {
238      Thread.sleep(100);
239    }
240    assertEquals(1, cluster.getRegions(tableName).size());
241
242    HRegion region = cluster.getRegions(tableName).get(0);
243    HStore store = region.getStore(cf);
244    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
245    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
246
247    Table t = TESTING_UTIL.getConnection().getTable(tableName);
248    // insert data
249    insertData(tableName, admin, t);
250    insertData(tableName, admin, t);
251
252    int fileNum = store.getStorefiles().size();
253    // 0, Compaction Request
254    store.triggerMajorCompaction();
255    Optional<CompactionContext> cc = store.requestCompaction();
256    assertTrue(cc.isPresent());
257    // 1, A timeout split
258    // 1.1 close region
259    assertEquals(2, region.close(false).get(cf).size());
260    // 1.2 rollback and Region initialize again
261    region.initialize();
262
263    // 2, Run Compaction cc
264    assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE));
265    assertTrue(fileNum > store.getStorefiles().size());
266
267    // 3, Split
268    requestSplitRegion(regionServer, region, Bytes.toBytes("row3"));
269    assertEquals(2, cluster.getRegions(tableName).size());
270  }
271
272  public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver {
273    volatile CountDownLatch latch;
274
275    @Override
276    public void start(CoprocessorEnvironment e) throws IOException {
277      latch = new CountDownLatch(1);
278    }
279
280    @Override
281    public Optional<MasterObserver> getMasterObserver() {
282      return Optional.of(this);
283    }
284
285    @Override
286    public void preSplitRegionBeforeMETAAction(
287        final ObserverContext<MasterCoprocessorEnvironment> ctx,
288        final byte[] splitKey,
289        final List<Mutation> metaEntries) throws IOException {
290      latch.countDown();
291      throw new IOException("Causing rollback of region split");
292    }
293  }
294
295  @Test
296  public void testSplitRollbackOnRegionClosing() throws IOException, InterruptedException {
297    final TableName tableName = TableName.valueOf(name.getMethodName());
298
299    // Create table then get the single region for our new table.
300    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
301    List<HRegion> regions = cluster.getRegions(tableName);
302    RegionInfo hri = getAndCheckSingleTableRegion(regions);
303
304    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
305
306    RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
307
308    // Turn off balancer so it doesn't cut in and mess up our placements.
309    this.admin.setBalancerRunning(false, true);
310    // Turn off the meta scanner so it don't remove parent on us.
311    cluster.getMaster().setCatalogJanitorEnabled(false);
312    try {
313      // Add a bit of load up into the table so splittable.
314      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
315      // Get region pre-split.
316      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
317      printOutRegions(server, "Initial regions: ");
318      int regionCount = cluster.getRegions(hri.getTable()).size();
319      regionStates.updateRegionState(hri, RegionState.State.CLOSING);
320
321      // Now try splitting.... should fail.  And each should successfully
322      // rollback.
323      // We don't roll back here anymore. Instead we fail-fast on construction of the
324      // split transaction. Catch the exception instead.
325      try {
326        this.admin.splitRegion(hri.getRegionName());
327        fail();
328      } catch (DoNotRetryRegionException e) {
329        // Expected
330      }
331      // Wait around a while and assert count of regions remains constant.
332      for (int i = 0; i < 10; i++) {
333        Thread.sleep(100);
334        assertEquals(regionCount, cluster.getRegions(hri.getTable()).size());
335      }
336      regionStates.updateRegionState(hri, State.OPEN);
337      // Now try splitting and it should work.
338      split(hri, server, regionCount);
339      // Get daughters
340      checkAndGetDaughters(tableName);
341      // OK, so split happened after we cleared the blocking node.
342    } finally {
343      admin.setBalancerRunning(true, false);
344      cluster.getMaster().setCatalogJanitorEnabled(true);
345      t.close();
346    }
347  }
348
349  /**
350   * Test that if daughter split on us, we won't do the shutdown handler fixup
351   * just because we can't find the immediate daughter of an offlined parent.
352   * @throws IOException
353   * @throws InterruptedException
354   */
355  @Test
356  public void testShutdownFixupWhenDaughterHasSplit()throws IOException, InterruptedException {
357    final TableName tableName = TableName.valueOf(name.getMethodName());
358
359    // Create table then get the single region for our new table.
360    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
361    List<HRegion> regions = cluster.getRegions(tableName);
362    RegionInfo hri = getAndCheckSingleTableRegion(regions);
363
364    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
365
366    // Turn off balancer so it doesn't cut in and mess up our placements.
367    this.admin.setBalancerRunning(false, true);
368    // Turn off the meta scanner so it don't remove parent on us.
369    cluster.getMaster().setCatalogJanitorEnabled(false);
370    try {
371      // Add a bit of load up into the table so splittable.
372      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
373      // Get region pre-split.
374      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
375      printOutRegions(server, "Initial regions: ");
376      int regionCount = cluster.getRegions(hri.getTable()).size();
377      // Now split.
378      split(hri, server, regionCount);
379      // Get daughters
380      List<HRegion> daughters = checkAndGetDaughters(tableName);
381      // Now split one of the daughters.
382      regionCount = cluster.getRegions(hri.getTable()).size();
383      RegionInfo daughter = daughters.get(0).getRegionInfo();
384      LOG.info("Daughter we are going to split: " + daughter);
385      // Compact first to ensure we have cleaned up references -- else the split
386      // will fail.
387      this.admin.compactRegion(daughter.getRegionName());
388      RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
389      while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
390          && retrier.shouldRetry()) {
391        retrier.sleepUntilNextRetry();
392      }
393      daughters = cluster.getRegions(tableName);
394      HRegion daughterRegion = null;
395      for (HRegion r : daughters) {
396        if (RegionInfo.COMPARATOR.compare(r.getRegionInfo(), daughter) == 0) {
397          daughterRegion = r;
398          // Archiving the compacted references file
399          r.getStores().get(0).closeAndArchiveCompactedFiles();
400          LOG.info("Found matching HRI: " + daughterRegion);
401          break;
402        }
403      }
404      assertTrue(daughterRegion != null);
405      for (int i=0; i<100; i++) {
406        if (!daughterRegion.hasReferences()) break;
407        Threads.sleep(100);
408      }
409      assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
410      LOG.info("Daughter hri before split (has been compacted): " + daughter);
411      split(daughter, server, regionCount);
412      // Get list of daughters
413      daughters = cluster.getRegions(tableName);
414      for (HRegion d: daughters) {
415        LOG.info("Regions before crash: " + d);
416      }
417      // Now crash the server
418      cluster.abortRegionServer(tableRegionIndex);
419      waitUntilRegionServerDead();
420      awaitDaughters(tableName, daughters.size());
421      // Assert daughters are online and ONLY the original daughters -- that
422      // fixup didn't insert one during server shutdown recover.
423      regions = cluster.getRegions(tableName);
424      for (HRegion d: daughters) {
425        LOG.info("Regions after crash: " + d);
426      }
427      if (daughters.size() != regions.size()) {
428        LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size());
429      }
430      assertEquals(daughters.size(), regions.size());
431      for (HRegion r: regions) {
432        LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r));
433        assertTrue("Missing region post crash " + r, daughters.contains(r));
434      }
435    } finally {
436      LOG.info("EXITING");
437      admin.setBalancerRunning(true, false);
438      cluster.getMaster().setCatalogJanitorEnabled(true);
439      t.close();
440    }
441  }
442
443  @Test
444  public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
445    TableName userTableName = TableName.valueOf(name.getMethodName());
446    HTableDescriptor htd = new HTableDescriptor(userTableName);
447    HColumnDescriptor hcd = new HColumnDescriptor("col");
448    htd.addFamily(hcd);
449    admin.createTable(htd);
450    Table table = TESTING_UTIL.getConnection().getTable(userTableName);
451    try {
452      for (int i = 0; i <= 5; i++) {
453        String row = "row" + i;
454        Put p = new Put(row.getBytes());
455        String val = "Val" + i;
456        p.addColumn("col".getBytes(), "ql".getBytes(), val.getBytes());
457        table.put(p);
458        admin.flush(userTableName);
459        Delete d = new Delete(row.getBytes());
460        // Do a normal delete
461        table.delete(d);
462        admin.flush(userTableName);
463      }
464      admin.majorCompact(userTableName);
465      List<RegionInfo> regionsOfTable =
466          cluster.getMaster().getAssignmentManager().getRegionStates()
467          .getRegionsOfTable(userTableName);
468      assertEquals(1, regionsOfTable.size());
469      RegionInfo hRegionInfo = regionsOfTable.get(0);
470      Put p = new Put("row6".getBytes());
471      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
472      table.put(p);
473      p = new Put("row7".getBytes());
474      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
475      table.put(p);
476      p = new Put("row8".getBytes());
477      p.addColumn("col".getBytes(), "ql".getBytes(), "val".getBytes());
478      table.put(p);
479      admin.flush(userTableName);
480      admin.splitRegion(hRegionInfo.getRegionName(), "row7".getBytes());
481      regionsOfTable = cluster.getMaster()
482          .getAssignmentManager().getRegionStates()
483          .getRegionsOfTable(userTableName);
484
485      while (regionsOfTable.size() != 2) {
486        Thread.sleep(1000);
487        regionsOfTable = cluster.getMaster()
488            .getAssignmentManager().getRegionStates()
489            .getRegionsOfTable(userTableName);
490        LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() +
491          ": " + regionsOfTable);
492
493      }
494      Assert.assertEquals(2, regionsOfTable.size());
495
496      Scan s = new Scan();
497      ResultScanner scanner = table.getScanner(s);
498      int mainTableCount = 0;
499      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
500        mainTableCount++;
501      }
502      Assert.assertEquals(3, mainTableCount);
503    } finally {
504      table.close();
505    }
506  }
507
508  /**
509   * Verifies HBASE-5806.  Here the case is that splitting is completed but before the
510   * CJ could remove the parent region the master is killed and restarted.
511   * @throws IOException
512   * @throws InterruptedException
513   * @throws NodeExistsException
514   * @throws KeeperException
515   */
516  @Test
517  public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
518      throws IOException, InterruptedException, NodeExistsException,
519      KeeperException, ServiceException {
520    final TableName tableName = TableName.valueOf(name.getMethodName());
521
522    // Create table then get the single region for our new table.
523    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
524    List<HRegion> regions = cluster.getRegions(tableName);
525    RegionInfo hri = getAndCheckSingleTableRegion(regions);
526
527    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
528
529    // Turn off balancer so it doesn't cut in and mess up our placements.
530    this.admin.setBalancerRunning(false, true);
531    // Turn off the meta scanner so it don't remove parent on us.
532    cluster.getMaster().setCatalogJanitorEnabled(false);
533    try {
534      // Add a bit of load up into the table so splittable.
535      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
536      // Get region pre-split.
537      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
538      printOutRegions(server, "Initial regions: ");
539      // Call split.
540      this.admin.splitRegion(hri.getRegionName());
541      List<HRegion> daughters = checkAndGetDaughters(tableName);
542
543      // Before cleanup, get a new master.
544      HMaster master = abortAndWaitForMaster();
545      // Now call compact on the daughters and clean up any references.
546      for (HRegion daughter : daughters) {
547        daughter.compact(true);
548        RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
549        while (CompactionState.NONE != admin
550            .getCompactionStateForRegion(daughter.getRegionInfo().getRegionName())
551            && retrier.shouldRetry()) {
552          retrier.sleepUntilNextRetry();
553        }
554        daughter.getStores().get(0).closeAndArchiveCompactedFiles();
555        assertFalse(daughter.hasReferences());
556      }
557      // BUT calling compact on the daughters is not enough. The CatalogJanitor looks
558      // in the filesystem, and the filesystem content is not same as what the Region
559      // is reading from. Compacted-away files are picked up later by the compacted
560      // file discharger process. It runs infrequently. Make it run so CatalogJanitor
561      // doens't find any references.
562      for (RegionServerThread rst: cluster.getRegionServerThreads()) {
563        boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
564        rst.getRegionServer().compactedFileDischarger.run();
565        rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
566      }
567      cluster.getMaster().setCatalogJanitorEnabled(true);
568      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
569      LOG.info("Starting run of CatalogJanitor");
570      cluster.getMaster().getCatalogJanitor().run();
571      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
572      RegionStates regionStates = master.getAssignmentManager().getRegionStates();
573      ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
574      assertEquals(null, regionServerOfRegion);
575    } finally {
576      TESTING_UTIL.getAdmin().setBalancerRunning(true, false);
577      cluster.getMaster().setCatalogJanitorEnabled(true);
578      t.close();
579    }
580  }
581
582  @Test
583  public void testSplitWithRegionReplicas() throws Exception {
584    final TableName tableName = TableName.valueOf(name.getMethodName());
585    HTableDescriptor htd = TESTING_UTIL.createTableDescriptor(name.getMethodName());
586    htd.setRegionReplication(2);
587    htd.addCoprocessor(SlowMeCopro.class.getName());
588    // Create table then get the single region for our new table.
589    Table t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")}, null);
590    List<HRegion> oldRegions;
591    do {
592      oldRegions = cluster.getRegions(tableName);
593      Thread.sleep(10);
594    } while (oldRegions.size() != 2);
595    for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo());
596    try {
597      int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo()
598        .getRegionName());
599      HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
600      insertData(tableName, admin, t);
601      // Turn off balancer so it doesn't cut in and mess up our placements.
602      admin.setBalancerRunning(false, true);
603      // Turn off the meta scanner so it don't remove parent on us.
604      cluster.getMaster().setCatalogJanitorEnabled(false);
605      boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
606          tableName);
607      assertEquals("The specified table should be present.", true, tableExists);
608      final HRegion region = findSplittableRegion(oldRegions);
609      regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
610      regionServer = cluster.getRegionServer(regionServerIndex);
611      assertTrue("not able to find a splittable region", region != null);
612      try {
613        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
614      } catch (IOException e) {
615        e.printStackTrace();
616        fail("Split execution should have succeeded with no exceptions thrown " + e);
617      }
618      //TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
619      List<HRegion> newRegions;
620      do {
621        newRegions = cluster.getRegions(tableName);
622        for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo());
623        Thread.sleep(1000);
624      } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
625          || newRegions.size() != 4);
626      tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
627          tableName);
628      assertEquals("The specified table should be present.", true, tableExists);
629      // exists works on stale and we see the put after the flush
630      byte[] b1 = "row1".getBytes();
631      Get g = new Get(b1);
632      g.setConsistency(Consistency.STRONG);
633      // The following GET will make a trip to the meta to get the new location of the 1st daughter
634      // In the process it will also get the location of the replica of the daughter (initially
635      // pointing to the parent's replica)
636      Result r = t.get(g);
637      Assert.assertFalse(r.isStale());
638      LOG.info("exists stale after flush done");
639
640      SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
641      g = new Get(b1);
642      g.setConsistency(Consistency.TIMELINE);
643      // This will succeed because in the previous GET we get the location of the replica
644      r = t.get(g);
645      Assert.assertTrue(r.isStale());
646      SlowMeCopro.getPrimaryCdl().get().countDown();
647    } finally {
648      SlowMeCopro.getPrimaryCdl().get().countDown();
649      admin.setBalancerRunning(true, false);
650      cluster.getMaster().setCatalogJanitorEnabled(true);
651      t.close();
652    }
653  }
654
655  private void insertData(final TableName tableName, Admin admin, Table t) throws IOException,
656      InterruptedException {
657    Put p = new Put(Bytes.toBytes("row1"));
658    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
659    t.put(p);
660    p = new Put(Bytes.toBytes("row2"));
661    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
662    t.put(p);
663    p = new Put(Bytes.toBytes("row3"));
664    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
665    t.put(p);
666    p = new Put(Bytes.toBytes("row4"));
667    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
668    t.put(p);
669    admin.flush(tableName);
670  }
671
672  /**
673   * If a table has regions that have no store files in a region, they should split successfully
674   * into two regions with no store files.
675   */
676  @Test
677  public void testSplitRegionWithNoStoreFiles()
678      throws Exception {
679    final TableName tableName = TableName.valueOf(name.getMethodName());
680    // Create table then get the single region for our new table.
681    createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
682    List<HRegion> regions = cluster.getRegions(tableName);
683    RegionInfo hri = getAndCheckSingleTableRegion(regions);
684    ensureTableRegionNotOnSameServerAsMeta(admin, hri);
685    int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
686      .getRegionName());
687    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
688    // Turn off balancer so it doesn't cut in and mess up our placements.
689    this.admin.setBalancerRunning(false, true);
690    // Turn off the meta scanner so it don't remove parent on us.
691    cluster.getMaster().setCatalogJanitorEnabled(false);
692    try {
693      // Precondition: we created a table with no data, no store files.
694      printOutRegions(regionServer, "Initial regions: ");
695      Configuration conf = cluster.getConfiguration();
696      HBaseFsck.debugLsr(conf, new Path("/"));
697      Path rootDir = FSUtils.getRootDir(conf);
698      FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
699      Map<String, Path> storefiles =
700          FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
701      assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size());
702
703      // find a splittable region.  Refresh the regions list
704      regions = cluster.getRegions(tableName);
705      final HRegion region = findSplittableRegion(regions);
706      assertTrue("not able to find a splittable region", region != null);
707
708      // Now split.
709      try {
710        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
711      } catch (IOException e) {
712        fail("Split execution should have succeeded with no exceptions thrown");
713      }
714
715      // Postcondition: split the table with no store files into two regions, but still have no
716      // store files
717      List<HRegion> daughters = cluster.getRegions(tableName);
718      assertEquals(2, daughters.size());
719
720      // check dirs
721      HBaseFsck.debugLsr(conf, new Path("/"));
722      Map<String, Path> storefilesAfter =
723          FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
724      assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0,
725          storefilesAfter.size());
726
727      hri = region.getRegionInfo(); // split parent
728      AssignmentManager am = cluster.getMaster().getAssignmentManager();
729      RegionStates regionStates = am.getRegionStates();
730      long start = EnvironmentEdgeManager.currentTime();
731      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
732        LOG.debug("Waiting for SPLIT state on: " + hri);
733        assertFalse("Timed out in waiting split parent to be in state SPLIT",
734          EnvironmentEdgeManager.currentTime() - start > 60000);
735        Thread.sleep(500);
736      }
737      assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN));
738      assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN));
739
740      // We should not be able to assign it again
741      am.assign(hri);
742      assertFalse("Split region can't be assigned",
743        regionStates.isRegionInTransition(hri));
744      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
745
746      // We should not be able to unassign it either
747      try {
748        am.unassign(hri);
749        fail("Should have thrown exception");
750      } catch (UnexpectedStateException e) {
751        // Expected
752      }
753      assertFalse("Split region can't be unassigned",
754        regionStates.isRegionInTransition(hri));
755      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
756    } finally {
757      admin.setBalancerRunning(true, false);
758      cluster.getMaster().setCatalogJanitorEnabled(true);
759    }
760  }
761
762  @Test
763  public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
764      throws Exception {
765    final TableName tableName = TableName.valueOf(name.getMethodName());
766    try {
767      HTableDescriptor htd = new HTableDescriptor(tableName);
768      htd.addFamily(new HColumnDescriptor("f"));
769      htd.addFamily(new HColumnDescriptor("i_f"));
770      htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
771      admin.createTable(htd);
772      List<HRegion> regions = awaitTableRegions(tableName);
773      HRegion region = regions.get(0);
774      for(int i = 3;i<9;i++) {
775        Put p = new Put(Bytes.toBytes("row"+i));
776        p.addColumn(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value" + i));
777        p.addColumn(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value" + i));
778        region.put(p);
779      }
780      region.flush(true);
781      HStore store = region.getStore(Bytes.toBytes("f"));
782      Collection<HStoreFile> storefiles = store.getStorefiles();
783      assertEquals(1, storefiles.size());
784      assertFalse(region.hasReferences());
785      Path referencePath =
786          region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
787            storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
788      assertNull(referencePath);
789      referencePath =
790          region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
791            storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
792      assertNotNull(referencePath);
793    } finally {
794      TESTING_UTIL.deleteTable(tableName);
795    }
796  }
797
798  private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
799    for (int i = 0; i < 5; ++i) {
800      for (HRegion r: regions) {
801        if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
802          return(r);
803        }
804      }
805      Thread.sleep(100);
806    }
807    return(null);
808  }
809
810  private List<HRegion> checkAndGetDaughters(TableName tableName)
811      throws InterruptedException {
812    List<HRegion> daughters = null;
813    // try up to 10s
814    for (int i=0; i<100; i++) {
815      daughters = cluster.getRegions(tableName);
816      if (daughters.size() >= 2) break;
817      Thread.sleep(100);
818    }
819    assertTrue(daughters.size() >= 2);
820    return daughters;
821  }
822
823  private HMaster abortAndWaitForMaster()
824  throws IOException, InterruptedException {
825    cluster.abortMaster(0);
826    cluster.waitOnMaster(0);
827    HMaster master = cluster.startMaster().getMaster();
828    cluster.waitForActiveAndReadyMaster();
829    return master;
830  }
831
832  private void split(final RegionInfo hri, final HRegionServer server, final int regionCount)
833      throws IOException, InterruptedException {
834    admin.splitRegion(hri.getRegionName());
835    for (int i = 0; cluster.getRegions(hri.getTable()).size() <= regionCount && i < 60; i++) {
836      LOG.debug("Waiting on region " + hri.getRegionNameAsString() + " to split");
837      Thread.sleep(2000);
838    }
839    assertFalse("Waited too long for split",
840        cluster.getRegions(hri.getTable()).size() <= regionCount);
841  }
842
843  /**
844   * Ensure single table region is not on same server as the single hbase:meta table
845   * region.
846   * @param admin
847   * @param hri
848   * @return Index of the server hosting the single table region
849   * @throws UnknownRegionException
850   * @throws MasterNotRunningException
851   * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
852   * @throws InterruptedException
853   */
854  private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
855      final RegionInfo hri)
856  throws IOException, MasterNotRunningException,
857  ZooKeeperConnectionException, InterruptedException {
858    // Now make sure that the table region is not on same server as that hosting
859    // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
860    // the table region serving server.
861    int metaServerIndex = cluster.getServerWithMeta();
862    boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(TESTING_UTIL.getConfiguration());
863    if (tablesOnMaster) {
864      // Need to check master is supposed to host meta... perhaps it is not.
865      throw new UnsupportedOperationException();
866      // TODO: assertTrue(metaServerIndex == -1); // meta is on master now
867    }
868    HRegionServer metaRegionServer = tablesOnMaster?
869      cluster.getMaster(): cluster.getRegionServer(metaServerIndex);
870    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
871    assertTrue(tableRegionIndex != -1);
872    HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
873    LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() +
874      ", other=" + tableRegionServer.getServerName());
875    if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
876      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
877      assertNotNull(hrs);
878      assertNotNull(hri);
879      LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
880        metaRegionServer.getServerName() + " to " +
881        hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
882      admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
883    }
884    // Wait till table region is up on the server that is NOT carrying hbase:meta.
885    for (int i = 0; i < 100; i++) {
886      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
887      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
888      LOG.debug("Waiting on region move off the hbase:meta server; current index " +
889        tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
890      Thread.sleep(100);
891    }
892    assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex,
893      tableRegionIndex != -1 && tableRegionIndex != metaServerIndex);
894    // Verify for sure table region is not on same server as hbase:meta
895    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
896    assertTrue(tableRegionIndex != -1);
897    assertNotSame(metaServerIndex, tableRegionIndex);
898    return tableRegionIndex;
899  }
900
901  /**
902   * Find regionserver other than the one passed.
903   * Can't rely on indexes into list of regionservers since crashed servers
904   * occupy an index.
905   * @param cluster
906   * @param notThisOne
907   * @return A regionserver that is not <code>notThisOne</code> or null if none
908   * found
909   */
910  private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
911      final HRegionServer notThisOne) {
912    for (RegionServerThread rst: cluster.getRegionServerThreads()) {
913      HRegionServer hrs = rst.getRegionServer();
914      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
915      if (hrs.isStopping() || hrs.isStopped()) continue;
916      return hrs;
917    }
918    return null;
919  }
920
921  private void printOutRegions(final HRegionServer hrs, final String prefix)
922      throws IOException {
923    List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
924    for (RegionInfo region: regions) {
925      LOG.info(prefix + region.getRegionNameAsString());
926    }
927  }
928
929  private void waitUntilRegionServerDead() throws InterruptedException, IOException {
930    // Wait until the master processes the RS shutdown
931    for (int i=0; (cluster.getMaster().getClusterMetrics()
932        .getLiveServerMetrics().size() > NB_SERVERS
933        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i<100; i++) {
934      LOG.info("Waiting on server to go down");
935      Thread.sleep(100);
936    }
937    assertFalse("Waited too long for RS to die",
938      cluster.getMaster().getClusterMetrics(). getLiveServerMetrics().size() > NB_SERVERS
939        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS);
940  }
941
942  private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
943    // Wait till regions are back on line again.
944    for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) {
945      LOG.info("Waiting for repair to happen");
946      Thread.sleep(1000);
947    }
948    if (cluster.getRegions(tableName).size() < numDaughters) {
949      fail("Waiting too long for daughter regions");
950    }
951  }
952
953  private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
954    List<HRegion> regions = null;
955    for (int i = 0; i < 100; i++) {
956      regions = cluster.getRegions(tableName);
957      if (regions.size() > 0) break;
958      Thread.sleep(100);
959    }
960    return regions;
961  }
962
963  private Table createTableAndWait(TableName tableName, byte[] cf) throws IOException,
964      InterruptedException {
965    Table t = TESTING_UTIL.createTable(tableName, cf);
966    awaitTableRegions(tableName);
967    assertTrue("Table not online: " + tableName,
968      cluster.getRegions(tableName).size() != 0);
969    return t;
970  }
971
972  // Make it public so that JVMClusterUtil can access it.
973  public static class MyMaster extends HMaster {
974    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
975      super(conf);
976    }
977
978    @Override
979    protected RSRpcServices createRpcServices() throws IOException {
980      return new MyMasterRpcServices(this);
981    }
982  }
983
984  static class MyMasterRpcServices extends MasterRpcServices {
985    static AtomicBoolean enabled = new AtomicBoolean(false);
986
987    private HMaster myMaster;
988    public MyMasterRpcServices(HMaster master) throws IOException {
989      super(master);
990      myMaster = master;
991    }
992
993    @Override
994    public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
995        ReportRegionStateTransitionRequest req) throws ServiceException {
996      ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
997      if (enabled.get() && req.getTransition(0).getTransitionCode().equals(
998          TransitionCode.READY_TO_SPLIT) && !resp.hasErrorMessage()) {
999        RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
1000        for (RegionStates.RegionStateNode regionState:
1001          regionStates.getRegionsInTransition()) {
1002          /* TODO!!!!
1003          // Find the merging_new region and remove it
1004          if (regionState.isSplittingNew()) {
1005            regionStates.deleteRegion(regionState.getRegion());
1006          }
1007          */
1008        }
1009      }
1010      return resp;
1011    }
1012  }
1013
1014  static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
1015
1016    @Override
1017    protected boolean shouldSplit() {
1018      return true;
1019    }
1020
1021    @Override
1022    public boolean skipStoreFileRangeCheck(String familyName) {
1023      if(familyName.startsWith("i_")) {
1024        return true;
1025      } else {
1026        return false;
1027      }
1028    }
1029  }
1030}
1031