001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static org.apache.hadoop.hbase.master.HMaster.HBASE_MASTER_RSPROC_DISPATCHER_CLASS;
021
022import java.util.List;
023import java.util.stream.Collectors;
024import java.util.stream.IntStream;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.ServerName;
028import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.Admin;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.hbck.HbckChore;
038import org.apache.hadoop.hbase.master.hbck.HbckReport;
039import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
040import org.apache.hadoop.hbase.regionserver.HRegion;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.testclassification.LargeTests;
043import org.apache.hadoop.hbase.testclassification.MiscTests;
044import org.junit.AfterClass;
045import org.junit.Assert;
046import org.junit.Before;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Rule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.junit.rules.TestName;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
057
058/**
059 * Testing custom RSProcedureDispatcher to ensure retry limit can be imposed on certain errors.
060 */
061@Category({ MiscTests.class, LargeTests.class })
062public class TestProcDispatcher {
063
064  private static final Logger LOG = LoggerFactory.getLogger(TestProcDispatcher.class);
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestProcDispatcher.class);
069
070  @Rule
071  public TestName name = new TestName();
072
073  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
074  private static ServerName rs0;
075
076  @BeforeClass
077  public static void setUpBeforeClass() throws Exception {
078    TEST_UTIL.getConfiguration().set(HBASE_MASTER_RSPROC_DISPATCHER_CLASS,
079      RSProcDispatcher.class.getName());
080    TEST_UTIL.getConfiguration().setInt("hbase.master.rs.remote.proc.fail.fast.limit", 5);
081    TEST_UTIL.startMiniCluster(3);
082    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
083    rs0 = cluster.getRegionServer(0).getServerName();
084    TEST_UTIL.getAdmin().balancerSwitch(false, true);
085  }
086
087  @AfterClass
088  public static void tearDownAfterClass() throws Exception {
089    TEST_UTIL.shutdownMiniCluster();
090  }
091
092  @Before
093  public void setUp() throws Exception {
094    final TableName tableName = TableName.valueOf(name.getMethodName());
095    TableDescriptor tableDesc = TableDescriptorBuilder.newBuilder(tableName)
096      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam1")).build();
097    int startKey = 0;
098    int endKey = 80000;
099    TEST_UTIL.getAdmin().createTable(tableDesc, Bytes.toBytes(startKey), Bytes.toBytes(endKey), 9);
100  }
101
102  @Test
103  public void testRetryLimitOnConnClosedErrors() throws Exception {
104    HbckChore hbckChore = new HbckChore(TEST_UTIL.getHBaseCluster().getMaster());
105    final TableName tableName = TableName.valueOf(name.getMethodName());
106    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
107    Admin admin = TEST_UTIL.getAdmin();
108    Table table = TEST_UTIL.getConnection().getTable(tableName);
109    List<Put> puts = IntStream.range(10, 50000).mapToObj(i -> new Put(Bytes.toBytes(i))
110      .addColumn(Bytes.toBytes("fam1"), Bytes.toBytes("q1"), Bytes.toBytes("val_" + i)))
111      .collect(Collectors.toList());
112    table.put(puts);
113    admin.flush(tableName);
114    admin.compact(tableName);
115    Thread.sleep(3000);
116    HRegionServer hRegionServer0 = cluster.getRegionServer(0);
117    HRegionServer hRegionServer1 = cluster.getRegionServer(1);
118    HRegionServer hRegionServer2 = cluster.getRegionServer(2);
119    int numRegions0 = hRegionServer0.getNumberOfOnlineRegions();
120    int numRegions1 = hRegionServer1.getNumberOfOnlineRegions();
121    int numRegions2 = hRegionServer2.getNumberOfOnlineRegions();
122
123    hbckChore.choreForTesting();
124    HbckReport hbckReport = hbckChore.getLastReport();
125    Assert.assertEquals(0, hbckReport.getInconsistentRegions().size());
126    Assert.assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
127    Assert.assertEquals(0, hbckReport.getOrphanRegionsOnRS().size());
128
129    HRegion region0 = hRegionServer0.getRegions().get(0);
130    // move all regions from server1 to server0
131    for (HRegion region : hRegionServer1.getRegions()) {
132      TEST_UTIL.getAdmin().move(region.getRegionInfo().getEncodedNameAsBytes(), rs0);
133    }
134    TEST_UTIL.getAdmin().move(region0.getRegionInfo().getEncodedNameAsBytes());
135    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
136
137    // Ensure:
138    // 1. num of regions before and after scheduling SCP remain same
139    // 2. all procedures including SCPs are successfully completed
140    // 3. two servers have SCPs scheduled
141    TEST_UTIL.waitFor(5000, 1000, () -> {
142      LOG.info("numRegions0: {} , numRegions1: {} , numRegions2: {}", numRegions0, numRegions1,
143        numRegions2);
144      LOG.info("Online regions - server0 : {} , server1: {} , server2: {}",
145        cluster.getRegionServer(0).getNumberOfOnlineRegions(),
146        cluster.getRegionServer(1).getNumberOfOnlineRegions(),
147        cluster.getRegionServer(2).getNumberOfOnlineRegions());
148      LOG.info("Num of successfully completed procedures: {} , num of all procedures: {}",
149        master.getMasterProcedureExecutor().getProcedures().stream()
150          .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState()
151              == ProcedureProtos.ProcedureState.SUCCESS)
152          .count(),
153        master.getMasterProcedureExecutor().getProcedures().size());
154      LOG.info("Num of SCPs: " + master.getMasterProcedureExecutor().getProcedures().stream()
155        .filter(proc -> proc instanceof ServerCrashProcedure).count());
156      return (numRegions0 + numRegions1 + numRegions2)
157          == (cluster.getRegionServer(0).getNumberOfOnlineRegions()
158            + cluster.getRegionServer(1).getNumberOfOnlineRegions()
159            + cluster.getRegionServer(2).getNumberOfOnlineRegions())
160        && master.getMasterProcedureExecutor().getProcedures().stream()
161          .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState()
162              == ProcedureProtos.ProcedureState.SUCCESS)
163          .count() == master.getMasterProcedureExecutor().getProcedures().size()
164        && master.getMasterProcedureExecutor().getProcedures().stream()
165          .anyMatch(proc -> proc instanceof ServerCrashProcedure);
166    });
167
168    // Ensure we have no inconsistent regions
169    TEST_UTIL.waitFor(5000, 1000, () -> {
170      hbckChore.choreForTesting();
171      HbckReport report = hbckChore.getLastReport();
172      return report.getInconsistentRegions().isEmpty() && report.getOrphanRegionsOnFS().isEmpty()
173        && report.getOrphanRegionsOnRS().isEmpty();
174    });
175
176  }
177
178}