001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.util.List;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.MiniHBaseCluster;
029import org.apache.hadoop.hbase.StartMiniClusterOption;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.Table;
032import org.apache.hadoop.hbase.master.HMaster;
033import org.apache.hadoop.hbase.master.LoadBalancer;
034import org.apache.hadoop.hbase.regionserver.HRegion;
035import org.apache.hadoop.hbase.testclassification.MediumTests;
036import org.apache.hadoop.hbase.util.JVMClusterUtil;
037import org.apache.hadoop.hbase.util.Threads;
038import org.junit.After;
039import org.junit.Before;
040import org.junit.ClassRule;
041import org.junit.Ignore;
042import org.junit.Rule;
043import org.junit.Test;
044import org.junit.experimental.categories.Category;
045import org.junit.rules.TestName;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049/**
050 * Test options for regions on master; none, system, or any (i.e. master is like any other
051 * regionserver). Checks how regions are deployed when each of the options are enabled.
052 * It then does kill combinations to make sure the distribution is more than just for startup.
053 * NOTE: Regions on Master does not work well. See HBASE-19828. Until addressed, disabling this
054 * test.
055 */
056@Ignore
057@Category({MediumTests.class})
058public class TestRegionsOnMasterOptions {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062      HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class);
063
064  private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class);
065  @Rule public TestName name = new TestName();
066  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067  private Configuration c;
068  private String tablesOnMasterOldValue;
069  private String systemTablesOnMasterOldValue;
070  private static final int SLAVES = 3;
071  private static final int MASTERS = 2;
072  // Make the count of REGIONS high enough so I can distingush case where master is only carrying
073  // system regions from the case where it is carrying any region; i.e. 2 system regions vs more
074  // if user + system.
075  private static final int REGIONS = 12;
076  private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled.
077
078  @Before
079  public void setup() {
080    this.c = TEST_UTIL.getConfiguration();
081    this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER);
082    this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER);
083  }
084
085  @After
086  public void tearDown() {
087    unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue);
088    unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue);
089  }
090
091  private void unset(final String key, final String value) {
092    if (value == null) {
093      c.unset(key);
094    } else {
095      c.set(key, value);
096    }
097  }
098
099  @Test
100  public void testRegionsOnAllServers() throws Exception {
101    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
102    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
103    int rsCount = (REGIONS + SYSTEM_REGIONS)/(SLAVES + 1/*Master*/);
104    checkBalance(rsCount, rsCount);
105  }
106
107  @Test
108  public void testNoRegionOnMaster() throws Exception {
109    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false);
110    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
111    int rsCount = (REGIONS + SYSTEM_REGIONS)/SLAVES;
112    checkBalance(0, rsCount);
113  }
114
115  @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not
116  // until way late after the Master startup finishes. Needs more work.
117  @Test
118  public void testSystemTablesOnMaster() throws Exception {
119    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
120    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true);
121    // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception
122    // thrown in doBatchMutate inside a Region.
123    //
124    //    java.lang.Exception
125    //    at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
126    //    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
127    //    at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
128    //    at org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55)
129    //    at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585)
130    //    at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579)
131    //    at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126)
132    //    at org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106)
133    //    at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589)
134    //    at org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156)
135    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222)
136    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76)
137    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40)
138    //    at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181)
139    //    at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847)
140    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440)
141    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209)
142    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79)
143    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719)
144    //
145    // If I comment out the ConnectionUtils ConnectionImplementation content, I see this:
146    //
147    //    java.lang.Exception
148    //    at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
149    //    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
150    //    at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
151    //    at org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546)
152    //    at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406)
153    //    at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133)
154    //    at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278)
155    //    at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258)
156
157    checkBalance(SYSTEM_REGIONS, REGIONS/SLAVES);
158  }
159
160  private void checkBalance(int masterCount, int rsCount) throws Exception {
161    StartMiniClusterOption option = StartMiniClusterOption.builder()
162        .numMasters(MASTERS).numRegionServers(SLAVES).numDataNodes(SLAVES).build();
163    MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(option);
164    TableName tn = TableName.valueOf(this.name.getMethodName());
165    try {
166      Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS);
167      LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList());
168      List<HRegion> regions = cluster.getMaster().getRegions();
169      int mActualCount = regions.size();
170      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
171        // 0 means no regions on master.
172        assertEquals(masterCount, mActualCount);
173      } else {
174        // This is master as a regionserver scenario.
175        checkCount(masterCount, mActualCount);
176      }
177      // Allow that balance is not exact. FYI, getRegionServerThreads does not include master
178      // thread though it is a regionserver so we have to check master and then below the
179      // regionservers.
180      for (JVMClusterUtil.RegionServerThread rst: cluster.getRegionServerThreads()) {
181        regions = rst.getRegionServer().getRegions();
182        int rsActualCount = regions.size();
183        checkCount(rsActualCount, rsCount);
184      }
185      HMaster oldMaster = cluster.getMaster();
186      cluster.killMaster(oldMaster.getServerName());
187      oldMaster.join();
188      while (cluster.getMaster() == null ||
189          cluster.getMaster().getServerName().equals(oldMaster.getServerName())) {
190        Threads.sleep(10);
191      }
192      while (!cluster.getMaster().isInitialized()) {
193        Threads.sleep(10);
194      }
195      while (cluster.getMaster().getAssignmentManager().
196          computeRegionInTransitionStat().getTotalRITs() > 0) {
197        Threads.sleep(100);
198        LOG.info("Waiting on RIT to go to zero before calling balancer...");
199      }
200      LOG.info("Cluster is up; running balancer");
201      cluster.getMaster().balance();
202      regions = cluster.getMaster().getRegions();
203      int mNewActualCount = regions.size();
204      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
205        // 0 means no regions on master. After crash, should still be no regions on master.
206        // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should
207        // still only carry system regions post crash.
208        assertEquals(masterCount, mNewActualCount);
209      }
210    } finally {
211      LOG.info("Running shutdown of cluster");
212      TEST_UTIL.shutdownMiniCluster();
213    }
214  }
215
216  private void checkCount(int actual, int expected) {
217    assertTrue("Actual=" + actual + ", expected=" + expected,
218    actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2
219  }
220}