001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.util.List; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.MiniHBaseCluster; 029import org.apache.hadoop.hbase.StartMiniClusterOption; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.client.Table; 032import org.apache.hadoop.hbase.master.HMaster; 033import org.apache.hadoop.hbase.master.LoadBalancer; 034import org.apache.hadoop.hbase.regionserver.HRegion; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.JVMClusterUtil; 037import org.apache.hadoop.hbase.util.Threads; 038import org.junit.After; 039import org.junit.Before; 040import org.junit.ClassRule; 041import org.junit.Ignore; 042import org.junit.Rule; 043import org.junit.Test; 044import org.junit.experimental.categories.Category; 045import org.junit.rules.TestName; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049/** 050 * Test options for regions on master; none, system, or any (i.e. master is like any other 051 * regionserver). Checks how regions are deployed when each of the options are enabled. 052 * It then does kill combinations to make sure the distribution is more than just for startup. 053 * NOTE: Regions on Master does not work well. See HBASE-19828. Until addressed, disabling this 054 * test. 055 */ 056@Ignore 057@Category({MediumTests.class}) 058public class TestRegionsOnMasterOptions { 059 060 @ClassRule 061 public static final HBaseClassTestRule CLASS_RULE = 062 HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class); 063 064 private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class); 065 @Rule public TestName name = new TestName(); 066 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 067 private Configuration c; 068 private String tablesOnMasterOldValue; 069 private String systemTablesOnMasterOldValue; 070 private static final int SLAVES = 3; 071 private static final int MASTERS = 2; 072 // Make the count of REGIONS high enough so I can distingush case where master is only carrying 073 // system regions from the case where it is carrying any region; i.e. 2 system regions vs more 074 // if user + system. 075 private static final int REGIONS = 12; 076 private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled. 077 078 @Before 079 public void setup() { 080 this.c = TEST_UTIL.getConfiguration(); 081 this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER); 082 this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER); 083 } 084 085 @After 086 public void tearDown() { 087 unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue); 088 unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue); 089 } 090 091 private void unset(final String key, final String value) { 092 if (value == null) { 093 c.unset(key); 094 } else { 095 c.set(key, value); 096 } 097 } 098 099 @Test 100 public void testRegionsOnAllServers() throws Exception { 101 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 102 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 103 int rsCount = (REGIONS + SYSTEM_REGIONS)/(SLAVES + 1/*Master*/); 104 checkBalance(rsCount, rsCount); 105 } 106 107 @Test 108 public void testNoRegionOnMaster() throws Exception { 109 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false); 110 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 111 int rsCount = (REGIONS + SYSTEM_REGIONS)/SLAVES; 112 checkBalance(0, rsCount); 113 } 114 115 @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not 116 // until way late after the Master startup finishes. Needs more work. 117 @Test 118 public void testSystemTablesOnMaster() throws Exception { 119 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 120 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true); 121 // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception 122 // thrown in doBatchMutate inside a Region. 123 // 124 // java.lang.Exception 125 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 126 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 127 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 128 // at org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55) 129 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585) 130 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579) 131 // at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126) 132 // at org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106) 133 // at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589) 134 // at org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156) 135 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222) 136 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76) 137 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40) 138 // at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181) 139 // at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847) 140 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440) 141 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209) 142 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79) 143 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719) 144 // 145 // If I comment out the ConnectionUtils ConnectionImplementation content, I see this: 146 // 147 // java.lang.Exception 148 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 149 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 150 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 151 // at org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546) 152 // at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406) 153 // at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133) 154 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278) 155 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258) 156 157 checkBalance(SYSTEM_REGIONS, REGIONS/SLAVES); 158 } 159 160 private void checkBalance(int masterCount, int rsCount) throws Exception { 161 StartMiniClusterOption option = StartMiniClusterOption.builder() 162 .numMasters(MASTERS).numRegionServers(SLAVES).numDataNodes(SLAVES).build(); 163 MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(option); 164 TableName tn = TableName.valueOf(this.name.getMethodName()); 165 try { 166 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS); 167 LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList()); 168 List<HRegion> regions = cluster.getMaster().getRegions(); 169 int mActualCount = regions.size(); 170 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 171 // 0 means no regions on master. 172 assertEquals(masterCount, mActualCount); 173 } else { 174 // This is master as a regionserver scenario. 175 checkCount(masterCount, mActualCount); 176 } 177 // Allow that balance is not exact. FYI, getRegionServerThreads does not include master 178 // thread though it is a regionserver so we have to check master and then below the 179 // regionservers. 180 for (JVMClusterUtil.RegionServerThread rst: cluster.getRegionServerThreads()) { 181 regions = rst.getRegionServer().getRegions(); 182 int rsActualCount = regions.size(); 183 checkCount(rsActualCount, rsCount); 184 } 185 HMaster oldMaster = cluster.getMaster(); 186 cluster.killMaster(oldMaster.getServerName()); 187 oldMaster.join(); 188 while (cluster.getMaster() == null || 189 cluster.getMaster().getServerName().equals(oldMaster.getServerName())) { 190 Threads.sleep(10); 191 } 192 while (!cluster.getMaster().isInitialized()) { 193 Threads.sleep(10); 194 } 195 while (cluster.getMaster().getAssignmentManager(). 196 computeRegionInTransitionStat().getTotalRITs() > 0) { 197 Threads.sleep(100); 198 LOG.info("Waiting on RIT to go to zero before calling balancer..."); 199 } 200 LOG.info("Cluster is up; running balancer"); 201 cluster.getMaster().balance(); 202 regions = cluster.getMaster().getRegions(); 203 int mNewActualCount = regions.size(); 204 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 205 // 0 means no regions on master. After crash, should still be no regions on master. 206 // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should 207 // still only carry system regions post crash. 208 assertEquals(masterCount, mNewActualCount); 209 } 210 } finally { 211 LOG.info("Running shutdown of cluster"); 212 TEST_UTIL.shutdownMiniCluster(); 213 } 214 } 215 216 private void checkCount(int actual, int expected) { 217 assertTrue("Actual=" + actual + ", expected=" + expected, 218 actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2 219 } 220}