001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Optional;
024import java.util.concurrent.Future;
025import java.util.concurrent.TimeUnit;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Durability;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
037import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
038import org.apache.hadoop.hbase.coprocessor.ObserverContext;
039import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
040import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
041import org.apache.hadoop.hbase.coprocessor.RegionObserver;
042import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
043import org.apache.hadoop.hbase.regionserver.HRegionServer;
044import org.apache.hadoop.hbase.testclassification.LargeTests;
045import org.apache.hadoop.hbase.testclassification.MasterTests;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
048import org.apache.hadoop.hbase.wal.WALEdit;
049import org.junit.AfterClass;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054
055/**
056 * Test to ensure that the priority for procedures and stuck checker can partially solve the problem
057 * describe in HBASE-19976, that is, RecoverMetaProcedure can finally be executed within a certain
058 * period of time.
059 */
060@Category({ MasterTests.class, LargeTests.class })
061public class TestProcedurePriority {
062
063  @ClassRule
064  public static final HBaseClassTestRule CLASS_RULE =
065    HBaseClassTestRule.forClass(TestProcedurePriority.class);
066
067  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
068
069  private static String TABLE_NAME_PREFIX = "TestProcedurePriority-";
070
071  private static byte[] CF = Bytes.toBytes("cf");
072
073  private static byte[] CQ = Bytes.toBytes("cq");
074
075  private static int CORE_POOL_SIZE;
076
077  private static int TABLE_COUNT;
078
079  private static volatile boolean FAIL = false;
080
081  public static final class MyCP implements RegionObserver, RegionCoprocessor {
082
083    @Override
084    public Optional<RegionObserver> getRegionObserver() {
085      return Optional.of(this);
086    }
087
088    @Override
089    public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> c, Get get,
090        List<Cell> result) throws IOException {
091      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
092        throw new IOException("Inject error");
093      }
094    }
095
096    @Override
097    public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit,
098        Durability durability) throws IOException {
099      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
100        throw new IOException("Inject error");
101      }
102    }
103  }
104
105  @BeforeClass
106  public static void setUp() throws Exception {
107    UTIL.getConfiguration().setLong(ProcedureExecutor.WORKER_KEEP_ALIVE_TIME_CONF_KEY, 5000);
108    UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 4);
109    UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, MyCP.class.getName());
110    UTIL.startMiniCluster(3);
111    CORE_POOL_SIZE =
112      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getCorePoolSize();
113    TABLE_COUNT = 50 * CORE_POOL_SIZE;
114    List<Future<?>> futures = new ArrayList<>();
115    for (int i = 0; i < TABLE_COUNT; i++) {
116      futures.add(UTIL.getAdmin().createTableAsync(
117        TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE_NAME_PREFIX + i))
118          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build()));
119    }
120    for (Future<?> future : futures) {
121      future.get(1, TimeUnit.MINUTES);
122    }
123    UTIL.getAdmin().balance(true);
124    UTIL.waitUntilNoRegionsInTransition();
125  }
126
127  @AfterClass
128  public static void tearDown() throws Exception {
129    UTIL.shutdownMiniCluster();
130  }
131
132  @Test
133  public void test() throws Exception {
134    RegionServerThread rsWithMetaThread = UTIL.getMiniHBaseCluster().getRegionServerThreads()
135      .stream().filter(t -> !t.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty())
136      .findAny().get();
137    HRegionServer rsNoMeta = UTIL.getOtherRegionServer(rsWithMetaThread.getRegionServer());
138    // wait for NS table initialization to avoid our error inject affecting master initialization
139    UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME);
140    FAIL = true;
141    UTIL.getMiniHBaseCluster().killRegionServer(rsNoMeta.getServerName());
142    // wait until all the worker thread are stuck, which means that the stuck checker will start to
143    // add new worker thread.
144    ProcedureExecutor<?> executor =
145      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
146    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
147
148      @Override
149      public boolean evaluate() throws Exception {
150        return executor.getWorkerThreadCount() > CORE_POOL_SIZE;
151      }
152
153      @Override
154      public String explainFailure() throws Exception {
155        return "Stuck checker does not add new worker thread";
156      }
157    });
158    UTIL.getMiniHBaseCluster().killRegionServer(rsWithMetaThread.getRegionServer().getServerName());
159    rsWithMetaThread.join();
160    FAIL = false;
161    // verify that the cluster is back
162    UTIL.waitUntilNoRegionsInTransition(480000);
163    for (int i = 0; i < TABLE_COUNT; i++) {
164      try (Table table = UTIL.getConnection().getTable(TableName.valueOf(TABLE_NAME_PREFIX + i))) {
165        table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
166      }
167    }
168    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
169
170      @Override
171      public boolean evaluate() throws Exception {
172        return executor.getWorkerThreadCount() == CORE_POOL_SIZE;
173      }
174
175      @Override
176      public String explainFailure() throws Exception {
177        return "The new workers do not timeout";
178      }
179    });
180  }
181}