001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.junit.Assert.fail;
021
022import java.io.IOException;
023import java.util.Iterator;
024import java.util.List;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.Admin;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.master.HMaster;
031import org.apache.hadoop.hbase.testclassification.MasterTests;
032import org.apache.hadoop.hbase.testclassification.MediumTests;
033import org.apache.hadoop.hbase.util.Bytes;
034import org.apache.hadoop.hbase.util.Threads;
035import org.junit.AfterClass;
036import org.junit.Before;
037import org.junit.BeforeClass;
038import org.junit.ClassRule;
039import org.junit.Rule;
040import org.junit.Test;
041import org.junit.experimental.categories.Category;
042import org.junit.rules.TestName;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hbase.thirdparty.com.google.gson.JsonArray;
047import org.apache.hbase.thirdparty.com.google.gson.JsonElement;
048import org.apache.hbase.thirdparty.com.google.gson.JsonObject;
049import org.apache.hbase.thirdparty.com.google.gson.JsonParser;
050
051/**
052 * Tests for HBASE-18408 "AM consumes CPU and fills up the logs really fast when there is no RS to
053 * assign". If an {@link org.apache.hadoop.hbase.exceptions.UnexpectedStateException}, we'd spin on
054 * the ProcedureExecutor consuming CPU and filling logs. Test new back-off facility.
055 */
056@Category({MasterTests.class, MediumTests.class})
057public class TestUnexpectedStateException {
058  @ClassRule
059  public static final HBaseClassTestRule CLASS_RULE =
060      HBaseClassTestRule.forClass(TestUnexpectedStateException.class);
061  @Rule public final TestName name = new TestName();
062
063  private static final Logger LOG = LoggerFactory.getLogger(TestUnexpectedStateException.class);
064  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
065  private static final byte [] FAMILY = Bytes.toBytes("family");
066  private TableName tableName;
067  private static final int REGIONS = 10;
068
069  @BeforeClass
070  public static void beforeClass() throws Exception {
071    TEST_UTIL.startMiniCluster();
072  }
073
074  @AfterClass
075  public static void afterClass() throws Exception {
076    TEST_UTIL.shutdownMiniCluster();
077  }
078
079  @Before
080  public void before() throws IOException {
081    this.tableName = TableName.valueOf(this.name.getMethodName());
082    TEST_UTIL.createMultiRegionTable(this.tableName, FAMILY, REGIONS);
083  }
084
085  private RegionInfo pickArbitraryRegion(Admin admin) throws IOException {
086    List<RegionInfo> regions = admin.getRegions(this.tableName);
087    return regions.get(3);
088  }
089
090  /**
091   * Manufacture a state that will throw UnexpectedStateException.
092   * Change an assigned region's 'state' to be OPENING. That'll mess up a subsequent unassign
093   * causing it to throw UnexpectedStateException. We can easily manufacture this infinite retry
094   * state in UnassignProcedure because it has no startTransition. AssignProcedure does where it
095   * squashes whatever the current region state is making it OFFLINE. That makes it harder to mess
096   * it up. Make do with UnassignProcedure for now.
097   */
098  @Test
099  public void testUnableToAssign() throws Exception {
100    try (Admin admin = TEST_UTIL.getAdmin()) {
101      // Pick a random region from this tests' table to play with. Get its RegionStateNode.
102      // Clone it because the original will be changed by the system. We need clone to fake out
103      // a state.
104      final RegionInfo region = pickArbitraryRegion(admin);
105      AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
106      RegionStates.RegionStateNode rsn =  am.getRegionStates().getRegionStateNode(region);
107      // Now force region to be in OPENING state.
108      am.markRegionAsOpening(rsn);
109      // Now the 'region' is in an artificially bad state, try an unassign again.
110      // Run unassign in a thread because it is blocking.
111      Runnable unassign = () -> {
112        try {
113          admin.unassign(region.getRegionName(), true);
114        } catch (IOException ioe) {
115          fail("Failed assign");
116        }
117      };
118      Thread t = new Thread(unassign, "unassign");
119      t.start();
120      while(!t.isAlive()) {
121        Threads.sleep(100);
122      }
123      Threads.sleep(1000);
124      // Unassign should be running and failing. Look for incrementing timeout as evidence that
125      // Unassign is stuck and doing backoff.
126      // Now fix the condition we were waiting on so the unassign can complete.
127      JsonParser parser = new JsonParser();
128      long oldTimeout = 0;
129      int timeoutIncrements = 0;
130      while (true) {
131        long timeout = getUnassignTimeout(parser, admin.getProcedures());
132        if (timeout > oldTimeout) {
133          LOG.info("Timeout incremented, was {}, now is {}, increments={}",
134              timeout, oldTimeout, timeoutIncrements);
135          oldTimeout = timeout;
136          timeoutIncrements++;
137          if (timeoutIncrements > 3) {
138            // If we incremented at least twice, break; the backoff is working.
139            break;
140          }
141        }
142        Thread.sleep(1000);
143      }
144      TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
145      HMaster master = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
146      TEST_UTIL.waitFor(30000, () -> master.isInitialized());
147      am = master.getAssignmentManager();
148      rsn = am.getRegionStates().getRegionStateNode(region);
149      am.markRegionAsOpened(rsn);
150      t.join();
151    }
152  }
153
154  /**
155   * @param proceduresAsJSON This is String returned by admin.getProcedures call... an array of
156   *                         Procedures as JSON.
157   * @return The Procedure timeout value parsed from the Unassign Procedure.
158   * @Exception Thrown if we do not find UnassignProcedure or fail to parse timeout.
159   */
160  private long getUnassignTimeout(JsonParser parser, String proceduresAsJSON) throws Exception {
161    JsonArray array = parser.parse(proceduresAsJSON).getAsJsonArray();
162    Iterator<JsonElement> iterator = array.iterator();
163    while (iterator.hasNext()) {
164      JsonElement element = iterator.next();
165      JsonObject obj = element.getAsJsonObject();
166      String className = obj.get("className").getAsString();
167      String actualClassName = UnassignProcedure.class.getName();
168      if (className.equals(actualClassName)) {
169        return obj.get("timeout").getAsLong();
170      }
171    }
172    throw new Exception("Failed to find UnassignProcedure or timeout in " + proceduresAsJSON);
173  }
174}