001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.util.List; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.MiniHBaseCluster; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.Table; 031import org.apache.hadoop.hbase.master.HMaster; 032import org.apache.hadoop.hbase.master.LoadBalancer; 033import org.apache.hadoop.hbase.regionserver.HRegion; 034import org.apache.hadoop.hbase.testclassification.MediumTests; 035import org.apache.hadoop.hbase.util.JVMClusterUtil; 036import org.apache.hadoop.hbase.util.Threads; 037import org.junit.After; 038import org.junit.Before; 039import org.junit.ClassRule; 040import org.junit.Ignore; 041import org.junit.Rule; 042import org.junit.Test; 043import org.junit.experimental.categories.Category; 044import org.junit.rules.TestName; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048/** 049 * Test options for regions on master; none, system, or any (i.e. master is like any other 050 * regionserver). Checks how regions are deployed when each of the options are enabled. 051 * It then does kill combinations to make sure the distribution is more than just for startup. 052 * NOTE: Regions on Master does not work well. See HBASE-19828. Until addressed, disabling this 053 * test. 054 */ 055@Ignore 056@Category({MediumTests.class}) 057public class TestRegionsOnMasterOptions { 058 059 @ClassRule 060 public static final HBaseClassTestRule CLASS_RULE = 061 HBaseClassTestRule.forClass(TestRegionsOnMasterOptions.class); 062 063 private static final Logger LOG = LoggerFactory.getLogger(TestRegionsOnMasterOptions.class); 064 @Rule public TestName name = new TestName(); 065 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 066 private Configuration c; 067 private String tablesOnMasterOldValue; 068 private String systemTablesOnMasterOldValue; 069 private static final int SLAVES = 3; 070 private static final int MASTERS = 2; 071 // Make the count of REGIONS high enough so I can distingush case where master is only carrying 072 // system regions from the case where it is carrying any region; i.e. 2 system regions vs more 073 // if user + system. 074 private static final int REGIONS = 12; 075 private static final int SYSTEM_REGIONS = 2; // ns and meta -- no acl unless enabled. 076 077 @Before 078 public void setup() { 079 this.c = TEST_UTIL.getConfiguration(); 080 this.tablesOnMasterOldValue = c.get(LoadBalancer.TABLES_ON_MASTER); 081 this.systemTablesOnMasterOldValue = c.get(LoadBalancer.SYSTEM_TABLES_ON_MASTER); 082 } 083 084 @After 085 public void tearDown() { 086 unset(LoadBalancer.TABLES_ON_MASTER, this.tablesOnMasterOldValue); 087 unset(LoadBalancer.SYSTEM_TABLES_ON_MASTER, this.systemTablesOnMasterOldValue); 088 } 089 090 private void unset(final String key, final String value) { 091 if (value == null) { 092 c.unset(key); 093 } else { 094 c.set(key, value); 095 } 096 } 097 098 @Test 099 public void testRegionsOnAllServers() throws Exception { 100 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 101 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 102 int rsCount = (REGIONS + SYSTEM_REGIONS)/(SLAVES + 1/*Master*/); 103 checkBalance(rsCount, rsCount); 104 } 105 106 @Test 107 public void testNoRegionOnMaster() throws Exception { 108 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, false); 109 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, false); 110 int rsCount = (REGIONS + SYSTEM_REGIONS)/SLAVES; 111 checkBalance(0, rsCount); 112 } 113 114 @Ignore // Fix this. The Master startup doesn't allow Master reporting as a RegionServer, not 115 // until way late after the Master startup finishes. Needs more work. 116 @Test 117 public void testSystemTablesOnMaster() throws Exception { 118 c.setBoolean(LoadBalancer.TABLES_ON_MASTER, true); 119 c.setBoolean(LoadBalancer.SYSTEM_TABLES_ON_MASTER, true); 120 // IS THIS SHORT-CIRCUIT RPC? Yes. Here is how it looks currently if I have an exception 121 // thrown in doBatchMutate inside a Region. 122 // 123 // java.lang.Exception 124 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 125 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 126 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 127 // at org.apache.hadoop.hbase.client.ClientServiceCallable.doMutate(ClientServiceCallable.java:55) 128 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:585) 129 // at org.apache.hadoop.hbase.client.HTable$3.rpcCall(HTable.java:579) 130 // at org.apache.hadoop.hbase.client.RegionServerCallable.call(RegionServerCallable.java:126) 131 // at org.apache.hadoop.hbase.client.RpcRetryingCallerImpl.callWithRetries(RpcRetryingCallerImpl.java:106) 132 // at org.apache.hadoop.hbase.client.HTable.put(HTable.java:589) 133 // at org.apache.hadoop.hbase.master.TableNamespaceManager.insertIntoNSTable(TableNamespaceManager.java:156) 134 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.insertIntoNSTable(CreateNamespaceProcedure.java:222) 135 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:76) 136 // at org.apache.hadoop.hbase.master.procedure.CreateNamespaceProcedure.executeFromState(CreateNamespaceProcedure.java:40) 137 // at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:181) 138 // at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:847) 139 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1440) 140 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1209) 141 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$800(ProcedureExecutor.java:79) 142 // at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1719) 143 // 144 // If I comment out the ConnectionUtils ConnectionImplementation content, I see this: 145 // 146 // java.lang.Exception 147 // at org.apache.hadoop.hbase.regionserver.HRegion.doBatchMutate(HRegion.java:3845) 148 // at org.apache.hadoop.hbase.regionserver.HRegion.put(HRegion.java:2972) 149 // at org.apache.hadoop.hbase.regionserver.RSRpcServices.mutate(RSRpcServices.java:2751) 150 // at org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:41546) 151 // at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:406) 152 // at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:133) 153 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:278) 154 // at org.apache.hadoop.hbase.ipc.RpcExecutor$Handler.run(RpcExecutor.java:258) 155 156 checkBalance(SYSTEM_REGIONS, REGIONS/SLAVES); 157 } 158 159 private void checkBalance(int masterCount, int rsCount) throws Exception { 160 MiniHBaseCluster cluster = TEST_UTIL.startMiniCluster(MASTERS, SLAVES); 161 TableName tn = TableName.valueOf(this.name.getMethodName()); 162 try { 163 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY, REGIONS); 164 LOG.info("Server: " + cluster.getMaster().getServerManager().getOnlineServersList()); 165 List<HRegion> regions = cluster.getMaster().getRegions(); 166 int mActualCount = regions.size(); 167 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 168 // 0 means no regions on master. 169 assertEquals(masterCount, mActualCount); 170 } else { 171 // This is master as a regionserver scenario. 172 checkCount(masterCount, mActualCount); 173 } 174 // Allow that balance is not exact. FYI, getRegionServerThreads does not include master 175 // thread though it is a regionserver so we have to check master and then below the 176 // regionservers. 177 for (JVMClusterUtil.RegionServerThread rst: cluster.getRegionServerThreads()) { 178 regions = rst.getRegionServer().getRegions(); 179 int rsActualCount = regions.size(); 180 checkCount(rsActualCount, rsCount); 181 } 182 HMaster oldMaster = cluster.getMaster(); 183 cluster.killMaster(oldMaster.getServerName()); 184 oldMaster.join(); 185 while (cluster.getMaster() == null || 186 cluster.getMaster().getServerName().equals(oldMaster.getServerName())) { 187 Threads.sleep(10); 188 } 189 while (!cluster.getMaster().isInitialized()) { 190 Threads.sleep(10); 191 } 192 while (cluster.getMaster().getAssignmentManager(). 193 computeRegionInTransitionStat().getTotalRITs() > 0) { 194 Threads.sleep(100); 195 LOG.info("Waiting on RIT to go to zero before calling balancer..."); 196 } 197 LOG.info("Cluster is up; running balancer"); 198 cluster.getMaster().balance(); 199 regions = cluster.getMaster().getRegions(); 200 int mNewActualCount = regions.size(); 201 if (masterCount == 0 || masterCount == SYSTEM_REGIONS) { 202 // 0 means no regions on master. After crash, should still be no regions on master. 203 // If masterCount == SYSTEM_REGIONS, means master only carrying system regions and should 204 // still only carry system regions post crash. 205 assertEquals(masterCount, mNewActualCount); 206 } 207 } finally { 208 LOG.info("Running shutdown of cluster"); 209 TEST_UTIL.shutdownMiniCluster(); 210 } 211 } 212 213 private void checkCount(int actual, int expected) { 214 assertTrue("Actual=" + actual + ", expected=" + expected, 215 actual >= (expected - 2) && actual <= (expected + 2)); // Lots of slop +/- 2 216 } 217}