001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Optional; 024import java.util.concurrent.Future; 025import java.util.concurrent.Semaphore; 026import java.util.concurrent.TimeUnit; 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtil; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; 032import org.apache.hadoop.hbase.client.AsyncAdmin; 033import org.apache.hadoop.hbase.client.BalanceRequest; 034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 035import org.apache.hadoop.hbase.client.Durability; 036import org.apache.hadoop.hbase.client.Get; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.Table; 039import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 040import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 041import org.apache.hadoop.hbase.coprocessor.ObserverContext; 042import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor; 043import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 044import org.apache.hadoop.hbase.coprocessor.RegionObserver; 045import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 046import org.apache.hadoop.hbase.regionserver.HRegionServer; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.MasterTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 051import org.apache.hadoop.hbase.wal.WALEdit; 052import org.junit.AfterClass; 053import org.junit.BeforeClass; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057 058/** 059 * Test to ensure that the priority for procedures and stuck checker can partially solve the problem 060 * describe in HBASE-19976, that is, RecoverMetaProcedure can finally be executed within a certain 061 * period of time. 062 */ 063@Category({ MasterTests.class, LargeTests.class }) 064public class TestProcedurePriority { 065 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestProcedurePriority.class); 069 070 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 071 072 private static String TABLE_NAME_PREFIX = "TestProcedurePriority-"; 073 074 private static byte[] CF = Bytes.toBytes("cf"); 075 076 private static byte[] CQ = Bytes.toBytes("cq"); 077 078 private static int CORE_POOL_SIZE; 079 080 private static int TABLE_COUNT; 081 082 private static volatile boolean FAIL = false; 083 084 public static final class MyCP implements RegionObserver, RegionCoprocessor { 085 086 @Override 087 public Optional<RegionObserver> getRegionObserver() { 088 return Optional.of(this); 089 } 090 091 @Override 092 public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> c, Get get, 093 List<Cell> result) throws IOException { 094 if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) { 095 throw new IOException("Inject error"); 096 } 097 } 098 099 @Override 100 public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit, 101 Durability durability) throws IOException { 102 if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) { 103 throw new IOException("Inject error"); 104 } 105 } 106 } 107 108 @BeforeClass 109 public static void setUp() throws Exception { 110 UTIL.getConfiguration().setLong(ProcedureExecutor.WORKER_KEEP_ALIVE_TIME_CONF_KEY, 5000); 111 UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 4); 112 UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, MyCP.class.getName()); 113 UTIL.startMiniCluster(3); 114 CORE_POOL_SIZE = 115 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getCorePoolSize(); 116 TABLE_COUNT = 50 * CORE_POOL_SIZE; 117 List<Future<?>> futures = new ArrayList<>(); 118 AsyncAdmin admin = UTIL.getAsyncConnection().getAdmin(); 119 Semaphore concurrency = new Semaphore(10); 120 for (int i = 0; i < TABLE_COUNT; i++) { 121 concurrency.acquire(); 122 futures.add(admin 123 .createTable(TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE_NAME_PREFIX + i)) 124 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build()) 125 .whenComplete((r, e) -> concurrency.release())); 126 } 127 for (Future<?> future : futures) { 128 future.get(3, TimeUnit.MINUTES); 129 } 130 UTIL.getAdmin().balance(BalanceRequest.newBuilder().setIgnoreRegionsInTransition(true).build()); 131 UTIL.waitUntilNoRegionsInTransition(); 132 } 133 134 @AfterClass 135 public static void tearDown() throws Exception { 136 UTIL.shutdownMiniCluster(); 137 } 138 139 @Test 140 public void test() throws Exception { 141 RegionServerThread rsWithMetaThread = UTIL.getMiniHBaseCluster().getRegionServerThreads() 142 .stream().filter(t -> !t.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty()) 143 .findAny().get(); 144 HRegionServer rsNoMeta = UTIL.getOtherRegionServer(rsWithMetaThread.getRegionServer()); 145 FAIL = true; 146 UTIL.getMiniHBaseCluster().killRegionServer(rsNoMeta.getServerName()); 147 // wait until all the worker thread are stuck, which means that the stuck checker will start to 148 // add new worker thread. 149 ProcedureExecutor<?> executor = 150 UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 151 UTIL.waitFor(60000, new ExplainingPredicate<Exception>() { 152 153 @Override 154 public boolean evaluate() throws Exception { 155 return executor.getWorkerThreadCount() > CORE_POOL_SIZE; 156 } 157 158 @Override 159 public String explainFailure() throws Exception { 160 return "Stuck checker does not add new worker thread"; 161 } 162 }); 163 UTIL.getMiniHBaseCluster().killRegionServer(rsWithMetaThread.getRegionServer().getServerName()); 164 rsWithMetaThread.join(); 165 FAIL = false; 166 // verify that the cluster is back 167 UTIL.waitUntilNoRegionsInTransition(480000); 168 for (int i = 0; i < TABLE_COUNT; i++) { 169 try (Table table = UTIL.getConnection().getTable(TableName.valueOf(TABLE_NAME_PREFIX + i))) { 170 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i))); 171 } 172 } 173 UTIL.waitFor(60000, new ExplainingPredicate<Exception>() { 174 175 @Override 176 public boolean evaluate() throws Exception { 177 return executor.getWorkerThreadCount() == CORE_POOL_SIZE; 178 } 179 180 @Override 181 public String explainFailure() throws Exception { 182 return "The new workers do not timeout"; 183 } 184 }); 185 } 186}