001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertNotEquals;
022import static org.junit.jupiter.api.Assertions.assertNotNull;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024
025import java.io.IOException;
026import java.time.Instant;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Optional;
032import java.util.concurrent.Callable;
033import java.util.concurrent.CountDownLatch;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Collectors;
036import java.util.stream.Stream;
037import org.apache.hadoop.hbase.Coprocessor;
038import org.apache.hadoop.hbase.CoprocessorEnvironment;
039import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate;
040import org.apache.hadoop.hbase.HBaseTestingUtil;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
044import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
045import org.apache.hadoop.hbase.coprocessor.MasterObserver;
046import org.apache.hadoop.hbase.coprocessor.ObserverContext;
047import org.apache.hadoop.hbase.master.HMaster;
048import org.apache.hadoop.hbase.master.RegionState;
049import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
050import org.apache.hadoop.hbase.master.hbck.HbckChore;
051import org.apache.hadoop.hbase.master.hbck.HbckReport;
052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
053import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
054import org.apache.hadoop.hbase.procedure2.Procedure;
055import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
056import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
057import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
058import org.apache.hadoop.hbase.regionserver.HRegionServer;
059import org.apache.hadoop.hbase.testclassification.ClientTests;
060import org.apache.hadoop.hbase.testclassification.LargeTests;
061import org.apache.hadoop.hbase.util.Bytes;
062import org.junit.jupiter.api.AfterAll;
063import org.junit.jupiter.api.BeforeAll;
064import org.junit.jupiter.api.BeforeEach;
065import org.junit.jupiter.api.Tag;
066import org.junit.jupiter.api.TestTemplate;
067import org.junit.jupiter.params.provider.Arguments;
068import org.slf4j.Logger;
069import org.slf4j.LoggerFactory;
070
071import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
072
073/**
074 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
075 * afterward. Add any testing of HBaseHbck functionality here.
076 */
077@Tag(LargeTests.TAG)
078@Tag(ClientTests.TAG)
079@HBaseParameterizedTestTemplate(name = "{index}: async={0}")
080public class TestHbck {
081
082  private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class);
083  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
084
085  private final boolean async;
086
087  private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName());
088
089  private static ProcedureExecutor<MasterProcedureEnv> procExec;
090
091  private static AsyncConnection ASYNC_CONN;
092
093  public static Stream<Arguments> parameters() {
094    return Stream.of(Arguments.of(false), Arguments.of(true));
095  }
096
097  public TestHbck(boolean async) {
098    this.async = async;
099  }
100
101  private Hbck getHbck() throws Exception {
102    if (async) {
103      return ASYNC_CONN.getHbck().get();
104    } else {
105      return TEST_UTIL.getHbck();
106    }
107  }
108
109  @BeforeAll
110  public static void setUpBeforeClass() throws Exception {
111    TEST_UTIL.startMiniCluster(3);
112    TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") });
113    procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
114    ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
115    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
116      FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
117      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
118    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
119      FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
120      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
121  }
122
123  @AfterAll
124  public static void tearDownAfterClass() throws Exception {
125    Closeables.close(ASYNC_CONN, true);
126    TEST_UTIL.shutdownMiniCluster();
127  }
128
129  @BeforeEach
130  public void setUp() throws IOException {
131    TEST_UTIL.ensureSomeRegionServersAvailable(3);
132  }
133
134  public static class SuspendProcedure extends
135    ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface {
136    public SuspendProcedure() {
137      super();
138    }
139
140    @SuppressWarnings({ "rawtypes", "unchecked" })
141    @Override
142    protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException {
143      // Always suspend the procedure
144      throw new ProcedureSuspendedException();
145    }
146
147    @Override
148    public TableName getTableName() {
149      return TABLE_NAME;
150    }
151
152    @Override
153    public TableOperationType getTableOperationType() {
154      return TableOperationType.READ;
155    }
156  }
157
158  @TestTemplate
159  public void testBypassProcedure() throws Exception {
160    // SuspendProcedure
161    final SuspendProcedure proc = new SuspendProcedure();
162    long procId = procExec.submitProcedure(proc);
163    Thread.sleep(500);
164
165    // bypass the procedure
166    List<Long> pids = Arrays.<Long> asList(procId);
167    List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false);
168    assertTrue(results.get(0), "Failed to by pass procedure!");
169    TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
170    LOG.info("{} finished", proc);
171  }
172
173  @TestTemplate
174  public void testSetTableStateInMeta() throws Exception {
175    Hbck hbck = getHbck();
176    // set table state to DISABLED
177    hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED));
178    // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case
179    // will be DISABLED
180    TableState prevState =
181      hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED));
182    assertTrue(prevState.isDisabled(),
183      "Incorrect previous state! expected=DISABLED, found=" + prevState.getState());
184  }
185
186  @TestTemplate
187  public void testSetRegionStateInMeta() throws Exception {
188    Hbck hbck = getHbck();
189    Admin admin = TEST_UTIL.getAdmin();
190    TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME);
191    final List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
192    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
193    final Map<String, RegionState.State> beforeStates = new HashMap<>();
194    final Map<String, RegionState.State> requestStates = new HashMap<>();
195    regions.forEach(r -> {
196      RegionState beforeState = am.getRegionStates().getRegionState(r);
197      beforeStates.put(r.getEncodedName(), beforeState.getState());
198      LOG.debug("Before test: {} ; {}", r, beforeState.getState());
199      requestStates.put(r.getEncodedName(), RegionState.State.CLOSED);
200    });
201    final Callable<Void> doTest = () -> {
202      // run the entire test with the ProcedureExecution environment paused. This prevents
203      // background operations from modifying AM internal state between the assertions this test
204      // relies upon.
205      Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates);
206      result.forEach((k, v) -> {
207        RegionState.State beforeState = beforeStates.get(k);
208        assertEquals(beforeState, v, "response state should match before state; " + k);
209      });
210      regions.forEach(r -> {
211        RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName());
212        RegionState.State expectedState = requestStates.get(r.getEncodedName());
213        LOG.debug("After test: {}, {}", r, afterState);
214        assertEquals(expectedState, afterState.getState(),
215          "state in AM should match requested state ; " + r);
216      });
217      return null;
218    };
219    ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true);
220    // restore the table as we found it -- fragile?
221    hbck.setRegionStateInMeta(beforeStates);
222  }
223
224  @TestTemplate
225  public void testAssigns() throws Exception {
226    Hbck hbck = getHbck();
227    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
228    try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
229      List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream()
230        .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> {
231          final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName());
232          LOG.info("RS: {}", rs);
233        }).collect(Collectors.toList());
234      List<Long> pids = hbck
235        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
236      waitOnPids(pids);
237      // Rerun the unassign. Should fail for all Regions since they already unassigned; failed
238      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
239      pids = hbck
240        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
241      waitOnPids(pids);
242      for (long pid : pids) {
243        assertEquals(Procedure.NO_PROC_ID, pid);
244      }
245      // Rerun the unassign with override. Should fail for all Regions since they already
246      // unassigned; failed
247      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
248      pids = hbck.unassigns(
249        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false);
250      waitOnPids(pids);
251      for (long pid : pids) {
252        assertEquals(Procedure.NO_PROC_ID, pid);
253      }
254      // If we pass force, then we should be able to unassign EVEN THOUGH Regions already
255      // unassigned.... makes for a mess but operator might want to do this at an extreme when
256      // doing fixup of broke cluster.
257      pids = hbck.unassigns(
258        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, true);
259      waitOnPids(pids);
260      for (long pid : pids) {
261        assertNotEquals(Procedure.NO_PROC_ID, pid);
262      }
263      // Clean-up by bypassing all the unassigns we just made so tests can continue.
264      hbck.bypassProcedure(pids, 10000, true, true);
265      for (RegionInfo ri : regions) {
266        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
267          .getRegionStates().getRegionState(ri.getEncodedName());
268        LOG.info("RS: {}", rs.toString());
269        assertTrue(rs.isClosed(), rs.toString());
270      }
271      pids =
272        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
273      waitOnPids(pids);
274      // Rerun the assign. Should fail for all Regions since they already assigned; failed
275      // assign will manifest as all pids being -1 (ever since HBASE-24885).
276      pids =
277        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
278      for (long pid : pids) {
279        assertEquals(Procedure.NO_PROC_ID, pid);
280      }
281      for (RegionInfo ri : regions) {
282        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
283          .getRegionStates().getRegionState(ri.getEncodedName());
284        LOG.info("RS: {}", rs.toString());
285        assertTrue(rs.isOpened(), rs.toString());
286      }
287      // Rerun the assign with override. Should fail for all Regions since they already assigned
288      pids = hbck.assigns(
289        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false);
290      for (long pid : pids) {
291        assertEquals(Procedure.NO_PROC_ID, pid);
292      }
293      // What happens if crappy region list passed?
294      pids = hbck.assigns(
295        Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList()));
296      for (long pid : pids) {
297        assertEquals(Procedure.NO_PROC_ID, pid);
298      }
299    }
300  }
301
302  @TestTemplate
303  public void testScheduleSCP() throws Exception {
304    HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
305    try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) {
306      TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true);
307    }
308    ServerName serverName = testRs.getServerName();
309    Hbck hbck = getHbck();
310    List<Long> pids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName));
311    assertEquals(1, pids.size());
312    assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0));
313    LOG.debug("SCP pid is {}", pids.get(0));
314
315    List<Long> newPids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName));
316    assertEquals(1, pids.size());
317    assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0));
318    waitOnPids(pids);
319  }
320
321  @TestTemplate
322  public void testRunHbckChore() throws Exception {
323    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
324    HbckChore hbckChore = master.getHbckChore();
325    Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport())
326      .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH);
327    Hbck hbck = getHbck();
328    TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore);
329    HbckReport report = hbckChore.getLastReport();
330    assertNotNull(report);
331    assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp));
332  }
333
334  public static class FailingSplitAfterMetaUpdatedMasterObserver
335    implements MasterCoprocessor, MasterObserver {
336    @SuppressWarnings("checkstyle:VisibilityModifier")
337    public volatile CountDownLatch latch;
338
339    @Override
340    public void start(CoprocessorEnvironment e) throws IOException {
341      resetLatch();
342    }
343
344    @Override
345    public Optional<MasterObserver> getMasterObserver() {
346      return Optional.of(this);
347    }
348
349    @Override
350    public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx)
351      throws IOException {
352      LOG.info("I'm here");
353      latch.countDown();
354      throw new IOException("this procedure will fail at here forever");
355    }
356
357    public void resetLatch() {
358      this.latch = new CountDownLatch(1);
359    }
360  }
361
362  public static class FailingMergeAfterMetaUpdatedMasterObserver
363    implements MasterCoprocessor, MasterObserver {
364    @SuppressWarnings("checkstyle:VisibilityModifier")
365    public volatile CountDownLatch latch;
366
367    @Override
368    public void start(CoprocessorEnvironment e) throws IOException {
369      resetLatch();
370    }
371
372    @Override
373    public Optional<MasterObserver> getMasterObserver() {
374      return Optional.of(this);
375    }
376
377    public void resetLatch() {
378      this.latch = new CountDownLatch(1);
379    }
380
381    @Override
382    public void postMergeRegionsCommitAction(
383      final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge,
384      final RegionInfo mergedRegion) throws IOException {
385      latch.countDown();
386      throw new IOException("this procedure will fail at here forever");
387    }
388  }
389
390  private void waitOnPids(List<Long> pids) {
391    TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
392  }
393}