001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.fail; 021 022import java.io.IOException; 023import java.util.Iterator; 024import java.util.List; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.Admin; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.master.HMaster; 031import org.apache.hadoop.hbase.testclassification.MasterTests; 032import org.apache.hadoop.hbase.testclassification.MediumTests; 033import org.apache.hadoop.hbase.util.Bytes; 034import org.apache.hadoop.hbase.util.Threads; 035import org.junit.AfterClass; 036import org.junit.Before; 037import org.junit.BeforeClass; 038import org.junit.ClassRule; 039import org.junit.Rule; 040import org.junit.Test; 041import org.junit.experimental.categories.Category; 042import org.junit.rules.TestName; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hbase.thirdparty.com.google.gson.JsonArray; 047import org.apache.hbase.thirdparty.com.google.gson.JsonElement; 048import org.apache.hbase.thirdparty.com.google.gson.JsonObject; 049import org.apache.hbase.thirdparty.com.google.gson.JsonParser; 050 051/** 052 * Tests for HBASE-18408 "AM consumes CPU and fills up the logs really fast when there is no RS to 053 * assign". If an {@link org.apache.hadoop.hbase.exceptions.UnexpectedStateException}, we'd spin on 054 * the ProcedureExecutor consuming CPU and filling logs. Test new back-off facility. 055 */ 056@Category({MasterTests.class, MediumTests.class}) 057public class TestUnexpectedStateException { 058 @ClassRule 059 public static final HBaseClassTestRule CLASS_RULE = 060 HBaseClassTestRule.forClass(TestUnexpectedStateException.class); 061 @Rule public final TestName name = new TestName(); 062 063 private static final Logger LOG = LoggerFactory.getLogger(TestUnexpectedStateException.class); 064 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 065 private static final byte [] FAMILY = Bytes.toBytes("family"); 066 private TableName tableName; 067 private static final int REGIONS = 10; 068 069 @BeforeClass 070 public static void beforeClass() throws Exception { 071 TEST_UTIL.startMiniCluster(); 072 } 073 074 @AfterClass 075 public static void afterClass() throws Exception { 076 TEST_UTIL.shutdownMiniCluster(); 077 } 078 079 @Before 080 public void before() throws IOException { 081 this.tableName = TableName.valueOf(this.name.getMethodName()); 082 TEST_UTIL.createMultiRegionTable(this.tableName, FAMILY, REGIONS); 083 } 084 085 private RegionInfo pickArbitraryRegion(Admin admin) throws IOException { 086 List<RegionInfo> regions = admin.getRegions(this.tableName); 087 return regions.get(3); 088 } 089 090 /** 091 * Manufacture a state that will throw UnexpectedStateException. 092 * Change an assigned region's 'state' to be OPENING. That'll mess up a subsequent unassign 093 * causing it to throw UnexpectedStateException. We can easily manufacture this infinite retry 094 * state in UnassignProcedure because it has no startTransition. AssignProcedure does where it 095 * squashes whatever the current region state is making it OFFLINE. That makes it harder to mess 096 * it up. Make do with UnassignProcedure for now. 097 */ 098 @Test 099 public void testUnableToAssign() throws Exception { 100 try (Admin admin = TEST_UTIL.getAdmin()) { 101 // Pick a random region from this tests' table to play with. Get its RegionStateNode. 102 // Clone it because the original will be changed by the system. We need clone to fake out 103 // a state. 104 final RegionInfo region = pickArbitraryRegion(admin); 105 AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 106 RegionStates.RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 107 // Now force region to be in OPENING state. 108 am.markRegionAsOpening(rsn); 109 // Now the 'region' is in an artificially bad state, try an unassign again. 110 // Run unassign in a thread because it is blocking. 111 Runnable unassign = () -> { 112 try { 113 admin.unassign(region.getRegionName(), true); 114 } catch (IOException ioe) { 115 fail("Failed assign"); 116 } 117 }; 118 Thread t = new Thread(unassign, "unassign"); 119 t.start(); 120 while(!t.isAlive()) { 121 Threads.sleep(100); 122 } 123 Threads.sleep(1000); 124 // Unassign should be running and failing. Look for incrementing timeout as evidence that 125 // Unassign is stuck and doing backoff. 126 // Now fix the condition we were waiting on so the unassign can complete. 127 JsonParser parser = new JsonParser(); 128 long oldTimeout = 0; 129 int timeoutIncrements = 0; 130 while (true) { 131 long timeout = getUnassignTimeout(parser, admin.getProcedures()); 132 if (timeout > oldTimeout) { 133 LOG.info("Timeout incremented, was {}, now is {}, increments={}", 134 timeout, oldTimeout, timeoutIncrements); 135 oldTimeout = timeout; 136 timeoutIncrements++; 137 if (timeoutIncrements > 3) { 138 // If we incremented at least twice, break; the backoff is working. 139 break; 140 } 141 } 142 Thread.sleep(1000); 143 } 144 TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join(); 145 HMaster master = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster(); 146 TEST_UTIL.waitFor(30000, () -> master.isInitialized()); 147 am = master.getAssignmentManager(); 148 rsn = am.getRegionStates().getRegionStateNode(region); 149 am.markRegionAsOpened(rsn); 150 t.join(); 151 } 152 } 153 154 /** 155 * @param proceduresAsJSON This is String returned by admin.getProcedures call... an array of 156 * Procedures as JSON. 157 * @return The Procedure timeout value parsed from the Unassign Procedure. 158 * @Exception Thrown if we do not find UnassignProcedure or fail to parse timeout. 159 */ 160 private long getUnassignTimeout(JsonParser parser, String proceduresAsJSON) throws Exception { 161 JsonArray array = parser.parse(proceduresAsJSON).getAsJsonArray(); 162 Iterator<JsonElement> iterator = array.iterator(); 163 while (iterator.hasNext()) { 164 JsonElement element = iterator.next(); 165 JsonObject obj = element.getAsJsonObject(); 166 String className = obj.get("className").getAsString(); 167 String actualClassName = UnassignProcedure.class.getName(); 168 if (className.equals(actualClassName)) { 169 return obj.get("timeout").getAsLong(); 170 } 171 } 172 throw new Exception("Failed to find UnassignProcedure or timeout in " + proceduresAsJSON); 173 } 174}