001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.util.List;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.MiniHBaseCluster;
029import org.apache.hadoop.hbase.StartMiniClusterOption;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.Table;
032import org.apache.hadoop.hbase.master.HMaster;
033import org.apache.hadoop.hbase.master.LoadBalancer;
034import org.apache.hadoop.hbase.regionserver.HRegion;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.util.JVMClusterUtil;
037import org.apache.hadoop.hbase.util.Threads;
038import org.junit.After;
039import org.junit.Before;
040import org.junit.ClassRule;
041import org.junit.Ignore;
042import org.junit.Rule;
043import org.junit.Test;
044import org.junit.experimental.categories.Category;
045import org.junit.rules.TestName;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049/**
050 * Test options for regions on master; none, system, or any (i.e. master is like any other
051 * regionserver). Checks how regions are deployed when each of the options are enabled. It then does
052 * kill combinations to make sure the distribution is more than just for startup. NOTE: Regions on
053 * Master does not work well. See HBASE-19828. Until addressed, disabling this test.
054 */
055@Ignore
056@Category({ MediumTests.class })
057public class TestRegionsOnMasterOptions {
058
059  @ClassRule
060  public static final HBaseClassTestRule CLASS_RULE =
061    HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class);
062
063  private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class);
064  @Rule
065  public TestName name = new TestName();
066  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067  private Configuration c;
068  private String tablesOnMasterOldValue;
069  private String systemTablesOnMasterOldValue;
070  private static final int SLAVES = 3;
071  private static final int MASTERS = 2;
072  // Make the count of REGIONS high enough so I can distingush case where master is only carrying
073  // system regions from the case where it is carrying any region; i.e. 2 system regions vs more
074  // if user + system.
075  private static final int REGIONS = 12;
076  private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled.
077
078  @Before
079  public void setup() {
080    this.c = TEST_UTIL.getConfiguration();
081    this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER);
082    this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER);
083  }
084
085  @After
086  public void tearDown() {
087    unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue);
088    unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue);
089  }
090
091  private void unset(final String key, final String value) {
092    if (value == null) {
093      c.unset(key);
094    } else {
095      c.set(key, value);
096    }
097  }
098
099  @Test
100  public void testRegionsOnAllServers() throws Exception {
101    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
102    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
103    int rsCount = (REGIONS + SYSTEM_REGIONS) / (SLAVES + 1/* Master */);
104    checkBalance(rsCount, rsCount);
105  }
106
107  @Test
108  public void testNoRegionOnMaster() throws Exception {
109    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false);
110    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
111    int rsCount = (REGIONS + SYSTEM_REGIONS) / SLAVES;
112    checkBalance(0, rsCount);
113  }
114
115  @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not
116  // until way late after the Master startup finishes. Needs more work.
117  @Test
118  public void testSystemTablesOnMaster() throws Exception {
119    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
120    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true);
121    // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception
122    // thrown in doBatchMutate inside a Region.
123    //
124    // java.lang.Exception
125    // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
126    // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
127    // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
128    // at
129    // org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55)
130    // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585)
131    // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579)
132    // at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126)
133    // at
134    // org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106)
135    // at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589)
136    // at
137    // org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156)
138    // at
139    // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222)
140    // at
141    // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76)
142    // at
143    // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40)
144    // at
145    // org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181)
146    // at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847)
147    // at
148    // org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440)
149    // at
150    // org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209)
151    // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79)
152    // at
153    // org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719)
154    //
155    // If I comment out the ConnectionUtils ConnectionImplementation content, I see this:
156    //
157    // java.lang.Exception
158    // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
159    // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
160    // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
161    // at
162    // org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546)
163    // at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406)
164    // at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133)
165    // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278)
166    // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258)
167
168    checkBalance(SYSTEM_REGIONS, REGIONS / SLAVES);
169  }
170
171  private void checkBalance(int masterCount, int rsCount) throws Exception {
172    StartMiniClusterOption option = StartMiniClusterOption.builder().numMasters(MASTERS)
173      .numRegionServers(SLAVES).numDataNodes(SLAVES).build();
174    MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(option);
175    TableName tn = TableName.valueOf(this.name.getMethodName());
176    try {
177      Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS);
178      LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList());
179      List<HRegion> regions = cluster.getMaster().getRegions();
180      int mActualCount = regions.size();
181      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
182        // 0 means no regions on master.
183        assertEquals(masterCount, mActualCount);
184      } else {
185        // This is master as a regionserver scenario.
186        checkCount(masterCount, mActualCount);
187      }
188      // Allow that balance is not exact. FYI, getRegionServerThreads does not include master
189      // thread though it is a regionserver so we have to check master and then below the
190      // regionservers.
191      for (JVMClusterUtil.RegionServerThread rst : cluster.getRegionServerThreads()) {
192        regions = rst.getRegionServer().getRegions();
193        int rsActualCount = regions.size();
194        checkCount(rsActualCount, rsCount);
195      }
196      HMaster oldMaster = cluster.getMaster();
197      cluster.killMaster(oldMaster.getServerName());
198      oldMaster.join();
199      while (
200        cluster.getMaster() == null
201          || cluster.getMaster().getServerName().equals(oldMaster.getServerName())
202      ) {
203        Threads.sleep(10);
204      }
205      while (!cluster.getMaster().isInitialized()) {
206        Threads.sleep(10);
207      }
208      while (
209        cluster.getMaster().getAssignmentManager().computeRegionInTransitionStat().getTotalRITs()
210            > 0
211      ) {
212        Threads.sleep(100);
213        LOG.info("Waiting on RIT to go to zero before calling balancer...");
214      }
215      LOG.info("Cluster is up; running balancer");
216      cluster.getMaster().balance();
217      regions = cluster.getMaster().getRegions();
218      int mNewActualCount = regions.size();
219      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
220        // 0 means no regions on master. After crash, should still be no regions on master.
221        // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should
222        // still only carry system regions post crash.
223        assertEquals(masterCount, mNewActualCount);
224      }
225    } finally {
226      LOG.info("Running shutdown of cluster");
227      TEST_UTIL.shutdownMiniCluster();
228    }
229  }
230
231  private void checkCount(int actual, int expected) {
232    assertTrue("Actual=" + actual + ", expected=" + expected,
233      actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2
234  }
235}