001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.time.Instant;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Optional;
032import java.util.concurrent.Callable;
033import java.util.concurrent.CountDownLatch;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Collectors;
036import org.apache.hadoop.hbase.Coprocessor;
037import org.apache.hadoop.hbase.CoprocessorEnvironment;
038import org.apache.hadoop.hbase.HBaseClassTestRule;
039import org.apache.hadoop.hbase.HBaseTestingUtil;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
044import org.apache.hadoop.hbase.coprocessor.MasterObserver;
045import org.apache.hadoop.hbase.coprocessor.ObserverContext;
046import org.apache.hadoop.hbase.master.HMaster;
047import org.apache.hadoop.hbase.master.RegionState;
048import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
049import org.apache.hadoop.hbase.master.hbck.HbckChore;
050import org.apache.hadoop.hbase.master.hbck.HbckReport;
051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
052import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
053import org.apache.hadoop.hbase.procedure2.Procedure;
054import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
055import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
056import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
057import org.apache.hadoop.hbase.regionserver.HRegionServer;
058import org.apache.hadoop.hbase.testclassification.ClientTests;
059import org.apache.hadoop.hbase.testclassification.LargeTests;
060import org.apache.hadoop.hbase.util.Bytes;
061import org.junit.AfterClass;
062import org.junit.Before;
063import org.junit.BeforeClass;
064import org.junit.ClassRule;
065import org.junit.Rule;
066import org.junit.Test;
067import org.junit.experimental.categories.Category;
068import org.junit.rules.TestName;
069import org.junit.runner.RunWith;
070import org.junit.runners.Parameterized;
071import org.junit.runners.Parameterized.Parameter;
072import org.junit.runners.Parameterized.Parameters;
073import org.slf4j.Logger;
074import org.slf4j.LoggerFactory;
075
076import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
077
078/**
079 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
080 * afterward. Add any testing of HBaseHbck functionality here.
081 */
082@RunWith(Parameterized.class)
083@Category({ LargeTests.class, ClientTests.class })
084public class TestHbck {
085  @ClassRule
086  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class);
087
088  private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class);
089  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
090
091  @Rule
092  public TestName name = new TestName();
093
094  @SuppressWarnings("checkstyle:VisibilityModifier")
095  @Parameter
096  public boolean async;
097
098  private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName());
099
100  private static ProcedureExecutor<MasterProcedureEnv> procExec;
101
102  private static AsyncConnection ASYNC_CONN;
103
104  @Parameters(name = "{index}: async={0}")
105  public static List<Object[]> params() {
106    return Arrays.asList(new Object[] { false }, new Object[] { true });
107  }
108
109  private Hbck getHbck() throws Exception {
110    if (async) {
111      return ASYNC_CONN.getHbck().get();
112    } else {
113      return TEST_UTIL.getHbck();
114    }
115  }
116
117  @BeforeClass
118  public static void setUpBeforeClass() throws Exception {
119    TEST_UTIL.startMiniCluster(3);
120    TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") });
121    procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
122    ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
123    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
124      FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
125      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
126    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
127      FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
128      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
129  }
130
131  @AfterClass
132  public static void tearDownAfterClass() throws Exception {
133    Closeables.close(ASYNC_CONN, true);
134    TEST_UTIL.shutdownMiniCluster();
135  }
136
137  @Before
138  public void setUp() throws IOException {
139    TEST_UTIL.ensureSomeRegionServersAvailable(3);
140  }
141
142  public static class SuspendProcedure extends
143    ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface {
144    public SuspendProcedure() {
145      super();
146    }
147
148    @SuppressWarnings({ "rawtypes", "unchecked" })
149    @Override
150    protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException {
151      // Always suspend the procedure
152      throw new ProcedureSuspendedException();
153    }
154
155    @Override
156    public TableName getTableName() {
157      return TABLE_NAME;
158    }
159
160    @Override
161    public TableOperationType getTableOperationType() {
162      return TableOperationType.READ;
163    }
164  }
165
166  @Test
167  public void testBypassProcedure() throws Exception {
168    // SuspendProcedure
169    final SuspendProcedure proc = new SuspendProcedure();
170    long procId = procExec.submitProcedure(proc);
171    Thread.sleep(500);
172
173    // bypass the procedure
174    List<Long> pids = Arrays.<Long> asList(procId);
175    List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false);
176    assertTrue("Failed to by pass procedure!", results.get(0));
177    TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
178    LOG.info("{} finished", proc);
179  }
180
181  @Test
182  public void testSetTableStateInMeta() throws Exception {
183    Hbck hbck = getHbck();
184    // set table state to DISABLED
185    hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED));
186    // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case
187    // will be DISABLED
188    TableState prevState =
189      hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED));
190    assertTrue("Incorrect previous state! expected=DISABLED, found=" + prevState.getState(),
191      prevState.isDisabled());
192  }
193
194  @Test
195  public void testSetRegionStateInMeta() throws Exception {
196    Hbck hbck = getHbck();
197    Admin admin = TEST_UTIL.getAdmin();
198    TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME);
199    final List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
200    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
201    final Map<String, RegionState.State> beforeStates = new HashMap<>();
202    final Map<String, RegionState.State> requestStates = new HashMap<>();
203    regions.forEach(r -> {
204      RegionState beforeState = am.getRegionStates().getRegionState(r);
205      beforeStates.put(r.getEncodedName(), beforeState.getState());
206      LOG.debug("Before test: {} ; {}", r, beforeState.getState());
207      requestStates.put(r.getEncodedName(), RegionState.State.CLOSED);
208    });
209    final Callable<Void> doTest = () -> {
210      // run the entire test with the ProcedureExecution environment paused. This prevents
211      // background operations from modifying AM internal state between the assertions this test
212      // relies upon.
213      Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates);
214      result.forEach((k, v) -> {
215        RegionState.State beforeState = beforeStates.get(k);
216        assertEquals("response state should match before state; " + k, beforeState, v);
217      });
218      regions.forEach(r -> {
219        RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName());
220        RegionState.State expectedState = requestStates.get(r.getEncodedName());
221        LOG.debug("After test: {}, {}", r, afterState);
222        assertEquals("state in AM should match requested state ; " + r, expectedState,
223          afterState.getState());
224      });
225      return null;
226    };
227    ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true);
228    // restore the table as we found it -- fragile?
229    hbck.setRegionStateInMeta(beforeStates);
230  }
231
232  @Test
233  public void testAssigns() throws Exception {
234    Hbck hbck = getHbck();
235    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
236    try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
237      List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream()
238        .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> {
239          final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName());
240          LOG.info("RS: {}", rs);
241        }).collect(Collectors.toList());
242      List<Long> pids = hbck
243        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
244      waitOnPids(pids);
245      // Rerun the unassign. Should fail for all Regions since they already unassigned; failed
246      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
247      pids = hbck
248        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
249      waitOnPids(pids);
250      for (long pid : pids) {
251        assertEquals(Procedure.NO_PROC_ID, pid);
252      }
253      // Rerun the unassign with override. Should fail for all Regions since they already
254      // unassigned; failed
255      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
256      pids = hbck.unassigns(
257        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false);
258      waitOnPids(pids);
259      for (long pid : pids) {
260        assertEquals(Procedure.NO_PROC_ID, pid);
261      }
262      // If we pass force, then we should be able to unassign EVEN THOUGH Regions already
263      // unassigned.... makes for a mess but operator might want to do this at an extreme when
264      // doing fixup of broke cluster.
265      pids = hbck.unassigns(
266        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, true);
267      waitOnPids(pids);
268      for (long pid : pids) {
269        assertNotEquals(Procedure.NO_PROC_ID, pid);
270      }
271      // Clean-up by bypassing all the unassigns we just made so tests can continue.
272      hbck.bypassProcedure(pids, 10000, true, true);
273      for (RegionInfo ri : regions) {
274        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
275          .getRegionStates().getRegionState(ri.getEncodedName());
276        LOG.info("RS: {}", rs.toString());
277        assertTrue(rs.toString(), rs.isClosed());
278      }
279      pids =
280        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
281      waitOnPids(pids);
282      // Rerun the assign. Should fail for all Regions since they already assigned; failed
283      // assign will manifest as all pids being -1 (ever since HBASE-24885).
284      pids =
285        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
286      for (long pid : pids) {
287        assertEquals(Procedure.NO_PROC_ID, pid);
288      }
289      for (RegionInfo ri : regions) {
290        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
291          .getRegionStates().getRegionState(ri.getEncodedName());
292        LOG.info("RS: {}", rs.toString());
293        assertTrue(rs.toString(), rs.isOpened());
294      }
295      // Rerun the assign with override. Should fail for all Regions since they already assigned
296      pids = hbck.assigns(
297        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false);
298      for (long pid : pids) {
299        assertEquals(Procedure.NO_PROC_ID, pid);
300      }
301      // What happens if crappy region list passed?
302      pids = hbck.assigns(
303        Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList()));
304      for (long pid : pids) {
305        assertEquals(Procedure.NO_PROC_ID, pid);
306      }
307    }
308  }
309
310  @Test
311  public void testScheduleSCP() throws Exception {
312    HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
313    try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) {
314      TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true);
315    }
316    ServerName serverName = testRs.getServerName();
317    Hbck hbck = getHbck();
318    List<Long> pids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName));
319    assertEquals(1, pids.size());
320    assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0));
321    LOG.debug("SCP pid is {}", pids.get(0));
322
323    List<Long> newPids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName));
324    assertEquals(1, pids.size());
325    assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0));
326    waitOnPids(pids);
327  }
328
329  @Test
330  public void testRunHbckChore() throws Exception {
331    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
332    HbckChore hbckChore = master.getHbckChore();
333    Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport())
334      .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH);
335    Hbck hbck = getHbck();
336    TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore);
337    HbckReport report = hbckChore.getLastReport();
338    assertNotNull(report);
339    assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp));
340  }
341
342  public static class FailingSplitAfterMetaUpdatedMasterObserver
343    implements MasterCoprocessor, MasterObserver {
344    @SuppressWarnings("checkstyle:VisibilityModifier")
345    public volatile CountDownLatch latch;
346
347    @Override
348    public void start(CoprocessorEnvironment e) throws IOException {
349      resetLatch();
350    }
351
352    @Override
353    public Optional<MasterObserver> getMasterObserver() {
354      return Optional.of(this);
355    }
356
357    @Override
358    public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx)
359      throws IOException {
360      LOG.info("I'm here");
361      latch.countDown();
362      throw new IOException("this procedure will fail at here forever");
363    }
364
365    public void resetLatch() {
366      this.latch = new CountDownLatch(1);
367    }
368  }
369
370  public static class FailingMergeAfterMetaUpdatedMasterObserver
371    implements MasterCoprocessor, MasterObserver {
372    @SuppressWarnings("checkstyle:VisibilityModifier")
373    public volatile CountDownLatch latch;
374
375    @Override
376    public void start(CoprocessorEnvironment e) throws IOException {
377      resetLatch();
378    }
379
380    @Override
381    public Optional<MasterObserver> getMasterObserver() {
382      return Optional.of(this);
383    }
384
385    public void resetLatch() {
386      this.latch = new CountDownLatch(1);
387    }
388
389    @Override
390    public void postMergeRegionsCommitAction(
391      final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge,
392      final RegionInfo mergedRegion) throws IOException {
393      latch.countDown();
394      throw new IOException("this procedure will fail at here forever");
395    }
396  }
397
398  private void waitOnPids(List<Long> pids) {
399    TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
400  }
401}