001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.util.List; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.MiniHBaseCluster; 029import org.apache.hadoop.hbase.StartMiniClusterOption; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.client.Table; 032import org.apache.hadoop.hbase.master.HMaster; 033import org.apache.hadoop.hbase.master.LoadBalancer; 034import org.apache.hadoop.hbase.regionserver.HRegion; 035import org.apache.hadoop.hbase.testclassification.MediumTests; 036import org.apache.hadoop.hbase.util.JVMClusterUtil; 037import org.apache.hadoop.hbase.util.Threads; 038import org.junit.After; 039import org.junit.Before; 040import org.junit.ClassRule; 041import org.junit.Ignore; 042import org.junit.Rule; 043import org.junit.Test; 044import org.junit.experimental.categories.Category; 045import org.junit.rules.TestName; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049/** 050 * Test options for regions on master; none, system, or any (i.e. master is like any other 051 * regionserver). Checks how regions are deployed when each of the options are enabled. It then does 052 * kill combinations to make sure the distribution is more than just for startup. NOTE: Regions on 053 * Master does not work well. See HBASE-19828. Until addressed, disabling this test. 054 */ 055@Ignore 056@Category({ MediumTests.class }) 057public class TestRegionsOnMasterOptions { 058 059 @ClassRule 060 public static final HBaseClassTestRule CLASS_RULE = 061 HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class); 062 063 private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class); 064 @Rule 065 public TestName name = new TestName(); 066 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 067 private Configuration c; 068 private String tablesOnMasterOldValue; 069 private String systemTablesOnMasterOldValue; 070 private static final int SLAVES = 3; 071 private static final int MASTERS = 2; 072 // Make the count of REGIONS high enough so I can distingush case where master is only carrying 073 // system regions from the case where it is carrying any region; i.e. 2 system regions vs more 074 // if user + system. 075 private static final int REGIONS = 12; 076 private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled. 077 078 @Before 079 public void setup() { 080 this.c = TEST_UTIL.getConfiguration(); 081 this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER); 082 this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER); 083 } 084 085 @After 086 public void tearDown() { 087 unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue); 088 unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue); 089 } 090 091 private void unset(final String key, final String value) { 092 if (value == null) { 093 c.unset(key); 094 } else { 095 c.set(key, value); 096 } 097 } 098 099 @Test 100 public void testRegionsOnAllServers() throws Exception { 101 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 102 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 103 int rsCount = (REGIONS + SYSTEM_REGIONS) / (SLAVES + 1/* Master */); 104 checkBalance(rsCount, rsCount); 105 } 106 107 @Test 108 public void testNoRegionOnMaster() throws Exception { 109 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false); 110 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 111 int rsCount = (REGIONS + SYSTEM_REGIONS) / SLAVES; 112 checkBalance(0, rsCount); 113 } 114 115 @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not 116 // until way late after the Master startup finishes. Needs more work. 117 @Test 118 public void testSystemTablesOnMaster() throws Exception { 119 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 120 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true); 121 // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception 122 // thrown in doBatchMutate inside a Region. 123 // 124 // java.lang.Exception 125 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 126 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 127 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 128 // at 129 // org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55) 130 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585) 131 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579) 132 // at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126) 133 // at 134 // org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106) 135 // at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589) 136 // at 137 // org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156) 138 // at 139 // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222) 140 // at 141 // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76) 142 // at 143 // org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40) 144 // at 145 // org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181) 146 // at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847) 147 // at 148 // org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440) 149 // at 150 // org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209) 151 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79) 152 // at 153 // org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719) 154 // 155 // If I comment out the ConnectionUtils ConnectionImplementation content, I see this: 156 // 157 // java.lang.Exception 158 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 159 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 160 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 161 // at 162 // org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546) 163 // at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406) 164 // at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133) 165 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278) 166 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258) 167 168 checkBalance(SYSTEM_REGIONS, REGIONS / SLAVES); 169 } 170 171 private void checkBalance(int masterCount, int rsCount) throws Exception { 172 StartMiniClusterOption option = StartMiniClusterOption.builder().numMasters(MASTERS) 173 .numRegionServers(SLAVES).numDataNodes(SLAVES).build(); 174 MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(option); 175 TableName tn = TableName.valueOf(this.name.getMethodName()); 176 try { 177 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS); 178 LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList()); 179 List<HRegion> regions = cluster.getMaster().getRegions(); 180 int mActualCount = regions.size(); 181 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 182 // 0 means no regions on master. 183 assertEquals(masterCount, mActualCount); 184 } else { 185 // This is master as a regionserver scenario. 186 checkCount(masterCount, mActualCount); 187 } 188 // Allow that balance is not exact. FYI, getRegionServerThreads does not include master 189 // thread though it is a regionserver so we have to check master and then below the 190 // regionservers. 191 for (JVMClusterUtil.RegionServerThread rst : cluster.getRegionServerThreads()) { 192 regions = rst.getRegionServer().getRegions(); 193 int rsActualCount = regions.size(); 194 checkCount(rsActualCount, rsCount); 195 } 196 HMaster oldMaster = cluster.getMaster(); 197 cluster.killMaster(oldMaster.getServerName()); 198 oldMaster.join(); 199 while ( 200 cluster.getMaster() == null 201 || cluster.getMaster().getServerName().equals(oldMaster.getServerName()) 202 ) { 203 Threads.sleep(10); 204 } 205 while (!cluster.getMaster().isInitialized()) { 206 Threads.sleep(10); 207 } 208 while ( 209 cluster.getMaster().getAssignmentManager().computeRegionInTransitionStat().getTotalRITs() 210 > 0 211 ) { 212 Threads.sleep(100); 213 LOG.info("Waiting on RIT to go to zero before calling balancer..."); 214 } 215 LOG.info("Cluster is up; running balancer"); 216 cluster.getMaster().balance(); 217 regions = cluster.getMaster().getRegions(); 218 int mNewActualCount = regions.size(); 219 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 220 // 0 means no regions on master. After crash, should still be no regions on master. 221 // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should 222 // still only carry system regions post crash. 223 assertEquals(masterCount, mNewActualCount); 224 } 225 } finally { 226 LOG.info("Running shutdown of cluster"); 227 TEST_UTIL.shutdownMiniCluster(); 228 } 229 } 230 231 private void checkCount(int actual, int expected) { 232 assertTrue("Actual=" + actual + ", expected=" + expected, 233 actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2 234 } 235}