001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.util.List;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.MiniHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.Table;
031import org.apache.hadoop.hbase.master.HMaster;
032import org.apache.hadoop.hbase.master.LoadBalancer;
033import org.apache.hadoop.hbase.regionserver.HRegion;
034import org.apache.hadoop.hbase.testclassification.MediumTests;
035import org.apache.hadoop.hbase.util.JVMClusterUtil;
036import org.apache.hadoop.hbase.util.Threads;
037import org.junit.After;
038import org.junit.Before;
039import org.junit.ClassRule;
040import org.junit.Ignore;
041import org.junit.Rule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.junit.rules.TestName;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048/**
049 * Test options for regions on master; none, system, or any (i.e. master is like any other
050 * regionserver). Checks how regions are deployed when each of the options are enabled.
051 * It then does kill combinations to make sure the distribution is more than just for startup.
052 * NOTE: Regions on Master does not work well. See HBASE-19828. Until addressed, disabling this
053 * test.
054 */
055@Ignore
056@Category({MediumTests.class})
057public class TestRegionsOnMasterOptions {
058
059  @ClassRule
060  public static final HBaseClassTestRule CLASS_RULE =
061      HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class);
062
063  private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class);
064  @Rule public TestName name = new TestName();
065  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
066  private Configuration c;
067  private String tablesOnMasterOldValue;
068  private String systemTablesOnMasterOldValue;
069  private static final int SLAVES = 3;
070  private static final int MASTERS = 2;
071  // Make the count of REGIONS high enough so I can distingush case where master is only carrying
072  // system regions from the case where it is carrying any region; i.e. 2 system regions vs more
073  // if user + system.
074  private static final int REGIONS = 12;
075  private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled.
076
077  @Before
078  public void setup() {
079    this.c = TEST_UTIL.getConfiguration();
080    this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER);
081    this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER);
082  }
083
084  @After
085  public void tearDown() {
086    unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue);
087    unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue);
088  }
089
090  private void unset(final String key, final String value) {
091    if (value == null) {
092      c.unset(key);
093    } else {
094      c.set(key, value);
095    }
096  }
097
098  @Test
099  public void testRegionsOnAllServers() throws Exception {
100    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
101    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
102    int rsCount = (REGIONS + SYSTEM_REGIONS)/(SLAVES + 1/*Master*/);
103    checkBalance(rsCount, rsCount);
104  }
105
106  @Test
107  public void testNoRegionOnMaster() throws Exception {
108    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false);
109    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false);
110    int rsCount = (REGIONS + SYSTEM_REGIONS)/SLAVES;
111    checkBalance(0, rsCount);
112  }
113
114  @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not
115  // until way late after the Master startup finishes. Needs more work.
116  @Test
117  public void testSystemTablesOnMaster() throws Exception {
118    c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true);
119    c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true);
120    // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception
121    // thrown in doBatchMutate inside a Region.
122    //
123    //    java.lang.Exception
124    //    at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
125    //    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
126    //    at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
127    //    at org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55)
128    //    at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585)
129    //    at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579)
130    //    at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126)
131    //    at org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106)
132    //    at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589)
133    //    at org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156)
134    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222)
135    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76)
136    //    at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40)
137    //    at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181)
138    //    at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847)
139    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440)
140    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209)
141    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79)
142    //    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719)
143    //
144    // If I comment out the ConnectionUtils ConnectionImplementation content, I see this:
145    //
146    //    java.lang.Exception
147    //    at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845)
148    //    at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972)
149    //    at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751)
150    //    at org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546)
151    //    at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406)
152    //    at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133)
153    //    at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278)
154    //    at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258)
155
156    checkBalance(SYSTEM_REGIONS, REGIONS/SLAVES);
157  }
158
159  private void checkBalance(int masterCount, int rsCount) throws Exception {
160    MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(MASTERS, SLAVES);
161    TableName tn = TableName.valueOf(this.name.getMethodName());
162    try {
163      Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS);
164      LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList());
165      List<HRegion> regions = cluster.getMaster().getRegions();
166      int mActualCount = regions.size();
167      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
168        // 0 means no regions on master.
169        assertEquals(masterCount, mActualCount);
170      } else {
171        // This is master as a regionserver scenario.
172        checkCount(masterCount, mActualCount);
173      }
174      // Allow that balance is not exact. FYI, getRegionServerThreads does not include master
175      // thread though it is a regionserver so we have to check master and then below the
176      // regionservers.
177      for (JVMClusterUtil.RegionServerThread rst: cluster.getRegionServerThreads()) {
178        regions = rst.getRegionServer().getRegions();
179        int rsActualCount = regions.size();
180        checkCount(rsActualCount, rsCount);
181      }
182      HMaster oldMaster = cluster.getMaster();
183      cluster.killMaster(oldMaster.getServerName());
184      oldMaster.join();
185      while (cluster.getMaster() == null ||
186          cluster.getMaster().getServerName().equals(oldMaster.getServerName())) {
187        Threads.sleep(10);
188      }
189      while (!cluster.getMaster().isInitialized()) {
190        Threads.sleep(10);
191      }
192      while (cluster.getMaster().getAssignmentManager().
193          computeRegionInTransitionStat().getTotalRITs() > 0) {
194        Threads.sleep(100);
195        LOG.info("Waiting on RIT to go to zero before calling balancer...");
196      }
197      LOG.info("Cluster is up; running balancer");
198      cluster.getMaster().balance();
199      regions = cluster.getMaster().getRegions();
200      int mNewActualCount = regions.size();
201      if (masterCount == 0 || masterCount == SYSTEM_REGIONS) {
202        // 0 means no regions on master. After crash, should still be no regions on master.
203        // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should
204        // still only carry system regions post crash.
205        assertEquals(masterCount, mNewActualCount);
206      }
207    } finally {
208      LOG.info("Running shutdown of cluster");
209      TEST_UTIL.shutdownMiniCluster();
210    }
211  }
212
213  private void checkCount(int actual, int expected) {
214    assertTrue("Actual=" + actual + ", expected=" + expected,
215    actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2
216  }
217}