001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Optional;
024import java.util.concurrent.Future;
025import java.util.concurrent.TimeUnit;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Durability;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
037import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
038import org.apache.hadoop.hbase.coprocessor.ObserverContext;
039import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
040import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
041import org.apache.hadoop.hbase.coprocessor.RegionObserver;
042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.testclassification.MasterTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
048import org.apache.hadoop.hbase.wal.WALEdit;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054
055/**
056 * Test to ensure that the priority for procedures and stuck checker can partially solve the problem
057 * describe in HBASE-19976, that is, RecoverMetaProcedure can finally be executed within a certain
058 * period of time.
059 */
060@Category({ MasterTests.class, LargeTests.class })
061public class TestProcedurePriority {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestProcedurePriority.class);
066
067  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
068
069  private static String TABLE_NAME_PREFIX = "TestProcedurePriority-";
070
071  private static byte[] CF = Bytes.toBytes("cf");
072
073  private static byte[] CQ = Bytes.toBytes("cq");
074
075  private static int CORE_POOL_SIZE;
076
077  private static int TABLE_COUNT;
078
079  private static volatile boolean FAIL = false;
080
081  public static final class MyCP implements RegionObserver, RegionCoprocessor {
082
083    @Override
084    public Optional<RegionObserver> getRegionObserver() {
085      return Optional.of(this);
086    }
087
088    @Override
089    public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> c, Get get,
090        List<Cell> result) throws IOException {
091      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
092        throw new IOException("Inject error");
093      }
094    }
095
096    @Override
097    public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit,
098        Durability durability) throws IOException {
099      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
100        throw new IOException("Inject error");
101      }
102    }
103  }
104
105  @BeforeClass
106  public static void setUp() throws Exception {
107    UTIL.getConfiguration().setLong(ProcedureExecutor.WORKER_KEEP_ALIVE_TIME_CONF_KEY, 5000);
108    UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 4);
109    UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_URGENT_PROCEDURE_THREADS, 0);
110    UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, MyCP.class.getName());
111    UTIL.startMiniCluster(3);
112    CORE_POOL_SIZE =
113      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getCorePoolSize();
114    TABLE_COUNT = 50 * CORE_POOL_SIZE;
115    List<Future<?>> futures = new ArrayList<>();
116    for (int i = 0; i < TABLE_COUNT; i++) {
117      futures.add(UTIL.getAdmin().createTableAsync(
118        TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE_NAME_PREFIX + i))
119          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build(),
120        null));
121    }
122    for (Future<?> future : futures) {
123      future.get(1, TimeUnit.MINUTES);
124    }
125    UTIL.getAdmin().balance(true);
126    UTIL.waitUntilNoRegionsInTransition();
127  }
128
129  @AfterClass
130  public static void tearDown() throws Exception {
131    UTIL.shutdownMiniCluster();
132  }
133
134  @Test
135  public void test() throws Exception {
136    RegionServerThread rsWithMetaThread = UTIL.getMiniHBaseCluster().getRegionServerThreads()
137      .stream().filter(t -> !t.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty())
138      .findAny().get();
139    HRegionServer rsNoMeta = UTIL.getOtherRegionServer(rsWithMetaThread.getRegionServer());
140    // wait for NS table initialization to avoid our error inject affecting master initialization
141    UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME);
142    FAIL = true;
143    UTIL.getMiniHBaseCluster().killRegionServer(rsNoMeta.getServerName());
144    // wait until all the worker thread are stuck, which means that the stuck checker will start to
145    // add new worker thread.
146    ProcedureExecutor<?> executor =
147      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
148    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
149
150      @Override
151      public boolean evaluate() throws Exception {
152        return executor.getWorkerThreadCount() > CORE_POOL_SIZE;
153      }
154
155      @Override
156      public String explainFailure() throws Exception {
157        return "Stuck checker does not add new worker thread";
158      }
159    });
160    UTIL.getMiniHBaseCluster().killRegionServer(rsWithMetaThread.getRegionServer().getServerName());
161    rsWithMetaThread.join();
162    FAIL = false;
163    // verify that the cluster is back
164    UTIL.waitUntilNoRegionsInTransition(480000);
165    for (int i = 0; i < TABLE_COUNT; i++) {
166      try (Table table = UTIL.getConnection().getTable(TableName.valueOf(TABLE_NAME_PREFIX + i))) {
167        table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
168      }
169    }
170    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
171
172      @Override
173      public boolean evaluate() throws Exception {
174        return executor.getWorkerThreadCount() == CORE_POOL_SIZE;
175      }
176
177      @Override
178      public String explainFailure() throws Exception {
179        return "The new workers do not timeout";
180      }
181    });
182  }
183}