001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Optional;
024import java.util.concurrent.Future;
025import java.util.concurrent.TimeUnit;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.HBaseClassTestRule;
028import org.apache.hadoop.hbase.HBaseTestingUtility;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
031import org.apache.hadoop.hbase.client.BalanceRequest;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
033import org.apache.hadoop.hbase.client.Durability;
034import org.apache.hadoop.hbase.client.Get;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
038import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
039import org.apache.hadoop.hbase.coprocessor.ObserverContext;
040import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
041import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
042import org.apache.hadoop.hbase.coprocessor.RegionObserver;
043import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
044import org.apache.hadoop.hbase.regionserver.HRegionServer;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.testclassification.MasterTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
049import org.apache.hadoop.hbase.wal.WALEdit;
050import org.junit.AfterClass;
051import org.junit.BeforeClass;
052import org.junit.ClassRule;
053import org.junit.Test;
054import org.junit.experimental.categories.Category;
055
056/**
057 * Test to ensure that the priority for procedures and stuck checker can partially solve the problem
058 * describe in HBASE-19976, that is, RecoverMetaProcedure can finally be executed within a certain
059 * period of time.
060 */
061@Category({ MasterTests.class, LargeTests.class })
062public class TestProcedurePriority {
063
064  @ClassRule
065  public static final HBaseClassTestRule CLASS_RULE =
066    HBaseClassTestRule.forClass(TestProcedurePriority.class);
067
068  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
069
070  private static String TABLE_NAME_PREFIX = "TestProcedurePriority-";
071
072  private static byte[] CF = Bytes.toBytes("cf");
073
074  private static byte[] CQ = Bytes.toBytes("cq");
075
076  private static int CORE_POOL_SIZE;
077
078  private static int TABLE_COUNT;
079
080  private static volatile boolean FAIL = false;
081
082  public static final class MyCP implements RegionObserver, RegionCoprocessor {
083
084    @Override
085    public Optional<RegionObserver> getRegionObserver() {
086      return Optional.of(this);
087    }
088
089    @Override
090    public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> c, Get get,
091      List<Cell> result) throws IOException {
092      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
093        throw new IOException("Inject error");
094      }
095    }
096
097    @Override
098    public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit,
099      Durability durability) throws IOException {
100      if (FAIL && c.getEnvironment().getRegionInfo().isMetaRegion()) {
101        throw new IOException("Inject error");
102      }
103    }
104  }
105
106  @BeforeClass
107  public static void setUp() throws Exception {
108    UTIL.getConfiguration().setLong(ProcedureExecutor.WORKER_KEEP_ALIVE_TIME_CONF_KEY, 5000);
109    UTIL.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 4);
110    UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, MyCP.class.getName());
111    UTIL.startMiniCluster(3);
112    CORE_POOL_SIZE =
113      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor().getCorePoolSize();
114    TABLE_COUNT = 50 * CORE_POOL_SIZE;
115    List<Future<?>> futures = new ArrayList<>();
116    for (int i = 0; i < TABLE_COUNT; i++) {
117      futures.add(UTIL.getAdmin().createTableAsync(
118        TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE_NAME_PREFIX + i))
119          .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build()));
120    }
121    for (Future<?> future : futures) {
122      future.get(1, TimeUnit.MINUTES);
123    }
124    UTIL.getAdmin().balance(BalanceRequest.newBuilder().setIgnoreRegionsInTransition(true).build());
125    UTIL.waitUntilNoRegionsInTransition();
126  }
127
128  @AfterClass
129  public static void tearDown() throws Exception {
130    UTIL.shutdownMiniCluster();
131  }
132
133  @Test
134  public void test() throws Exception {
135    RegionServerThread rsWithMetaThread = UTIL.getMiniHBaseCluster().getRegionServerThreads()
136      .stream().filter(t -> !t.getRegionServer().getRegions(TableName.META_TABLE_NAME).isEmpty())
137      .findAny().get();
138    HRegionServer rsNoMeta = UTIL.getOtherRegionServer(rsWithMetaThread.getRegionServer());
139    // wait for NS table initialization to avoid our error inject affecting master initialization
140    UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME);
141    FAIL = true;
142    UTIL.getMiniHBaseCluster().killRegionServer(rsNoMeta.getServerName());
143    // wait until all the worker thread are stuck, which means that the stuck checker will start to
144    // add new worker thread.
145    ProcedureExecutor<?> executor =
146      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
147    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
148
149      @Override
150      public boolean evaluate() throws Exception {
151        return executor.getWorkerThreadCount() > CORE_POOL_SIZE;
152      }
153
154      @Override
155      public String explainFailure() throws Exception {
156        return "Stuck checker does not add new worker thread";
157      }
158    });
159    UTIL.getMiniHBaseCluster().killRegionServer(rsWithMetaThread.getRegionServer().getServerName());
160    rsWithMetaThread.join();
161    FAIL = false;
162    // verify that the cluster is back
163    UTIL.waitUntilNoRegionsInTransition(480000);
164    for (int i = 0; i < TABLE_COUNT; i++) {
165      try (Table table = UTIL.getConnection().getTable(TableName.valueOf(TABLE_NAME_PREFIX + i))) {
166        table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
167      }
168    }
169    UTIL.waitFor(60000, new ExplainingPredicate<Exception>() {
170
171      @Override
172      public boolean evaluate() throws Exception {
173        return executor.getWorkerThreadCount() == CORE_POOL_SIZE;
174      }
175
176      @Override
177      public String explainFailure() throws Exception {
178        return "The new workers do not timeout";
179      }
180    });
181  }
182}