001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.time.Instant;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Optional;
032import java.util.concurrent.CountDownLatch;
033import java.util.concurrent.TimeUnit;
034import java.util.stream.Collectors;
035import org.apache.hadoop.hbase.Coprocessor;
036import org.apache.hadoop.hbase.CoprocessorEnvironment;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtility;
039import org.apache.hadoop.hbase.ServerName;
040import org.apache.hadoop.hbase.TableName;
041import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
043import org.apache.hadoop.hbase.coprocessor.MasterObserver;
044import org.apache.hadoop.hbase.coprocessor.ObserverContext;
045import org.apache.hadoop.hbase.master.HMaster;
046import org.apache.hadoop.hbase.master.RegionState;
047import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
048import org.apache.hadoop.hbase.master.hbck.HbckChore;
049import org.apache.hadoop.hbase.master.hbck.HbckReport;
050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
051import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
052import org.apache.hadoop.hbase.procedure2.Procedure;
053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
054import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
055import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
056import org.apache.hadoop.hbase.regionserver.HRegionServer;
057import org.apache.hadoop.hbase.testclassification.ClientTests;
058import org.apache.hadoop.hbase.testclassification.LargeTests;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.Pair;
061import org.junit.AfterClass;
062import org.junit.Before;
063import org.junit.BeforeClass;
064import org.junit.ClassRule;
065import org.junit.Rule;
066import org.junit.Test;
067import org.junit.experimental.categories.Category;
068import org.junit.rules.TestName;
069import org.junit.runner.RunWith;
070import org.junit.runners.Parameterized;
071import org.junit.runners.Parameterized.Parameter;
072import org.junit.runners.Parameterized.Parameters;
073import org.slf4j.Logger;
074import org.slf4j.LoggerFactory;
075
076import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
077
078import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
079
080/**
081 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
082 * afterward. Add any testing of HBaseHbck functionality here.
083 */
084@RunWith(Parameterized.class)
085@Category({ LargeTests.class, ClientTests.class })
086public class TestHbck {
087  @ClassRule
088  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class);
089
090  private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class);
091  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
092
093  @Rule
094  public TestName name = new TestName();
095
096  @SuppressWarnings("checkstyle:VisibilityModifier")
097  @Parameter
098  public boolean async;
099
100  private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName());
101
102  private static ProcedureExecutor<MasterProcedureEnv> procExec;
103
104  private static AsyncConnection ASYNC_CONN;
105
106  @Parameters(name = "{index}: async={0}")
107  public static List<Object[]> params() {
108    return Arrays.asList(new Object[] { false }, new Object[] { true });
109  }
110
111  private Hbck getHbck() throws Exception {
112    if (async) {
113      return ASYNC_CONN.getHbck().get();
114    } else {
115      return TEST_UTIL.getHbck();
116    }
117  }
118
119  @BeforeClass
120  public static void setUpBeforeClass() throws Exception {
121    TEST_UTIL.startMiniCluster(3);
122    TEST_UTIL.createMultiRegionTable(TABLE_NAME, Bytes.toBytes("family1"), 5);
123    procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
124    ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
125    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
126      FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
127      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
128    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
129      FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
130      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
131  }
132
133  @AfterClass
134  public static void tearDownAfterClass() throws Exception {
135    Closeables.close(ASYNC_CONN, true);
136    TEST_UTIL.shutdownMiniCluster();
137  }
138
139  @Before
140  public void setUp() throws IOException {
141    TEST_UTIL.ensureSomeRegionServersAvailable(3);
142  }
143
144  public static class SuspendProcedure extends
145    ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface {
146    public SuspendProcedure() {
147      super();
148    }
149
150    @SuppressWarnings({ "rawtypes", "unchecked" })
151    @Override
152    protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException {
153      // Always suspend the procedure
154      throw new ProcedureSuspendedException();
155    }
156
157    @Override
158    public TableName getTableName() {
159      return TABLE_NAME;
160    }
161
162    @Override
163    public TableOperationType getTableOperationType() {
164      return TableOperationType.READ;
165    }
166  }
167
168  @Test
169  public void testBypassProcedure() throws Exception {
170    // SuspendProcedure
171    final SuspendProcedure proc = new SuspendProcedure();
172    long procId = procExec.submitProcedure(proc);
173    Thread.sleep(500);
174
175    // bypass the procedure
176    List<Long> pids = Arrays.<Long> asList(procId);
177    List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false);
178    assertTrue("Failed to by pass procedure!", results.get(0));
179    TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
180    LOG.info("{} finished", proc);
181  }
182
183  @Test
184  public void testSetTableStateInMeta() throws Exception {
185    Hbck hbck = getHbck();
186    // set table state to DISABLED
187    hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED));
188    // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case
189    // will be DISABLED
190    TableState prevState =
191      hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED));
192    assertTrue("Incorrect previous state! expeced=DISABLED, found=" + prevState.getState(),
193      prevState.isDisabled());
194  }
195
196  @Test
197  public void testSetRegionStateInMeta() throws Exception {
198    Hbck hbck = getHbck();
199    Admin admin = TEST_UTIL.getAdmin();
200    final List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
201    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
202    Map<String, RegionState.State> prevStates = new HashMap<>();
203    Map<String, RegionState.State> newStates = new HashMap<>();
204    final Map<String, Pair<RegionState.State, RegionState.State>> regionsMap = new HashMap<>();
205    regions.forEach(r -> {
206      RegionState prevState = am.getRegionStates().getRegionState(r);
207      prevStates.put(r.getEncodedName(), prevState.getState());
208      newStates.put(r.getEncodedName(), RegionState.State.CLOSED);
209      regionsMap.put(r.getEncodedName(),
210        new Pair<>(prevState.getState(), RegionState.State.CLOSED));
211    });
212    final Map<String, RegionState.State> result = hbck.setRegionStateInMeta(newStates);
213    result.forEach((k, v) -> {
214      RegionState.State prevState = regionsMap.get(k).getFirst();
215      assertEquals(prevState, v);
216    });
217    regions.forEach(r -> {
218      RegionState cachedState = am.getRegionStates().getRegionState(r.getEncodedName());
219      RegionState.State newState = regionsMap.get(r.getEncodedName()).getSecond();
220      assertEquals(newState, cachedState.getState());
221    });
222    hbck.setRegionStateInMeta(prevStates);
223  }
224
225  @Test
226  public void testAssigns() throws Exception {
227    Hbck hbck = getHbck();
228    try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
229      List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
230      for (RegionInfo ri : regions) {
231        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
232          .getRegionStates().getRegionState(ri.getEncodedName());
233        LOG.info("RS: {}", rs.toString());
234      }
235      List<Long> pids =
236        hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()));
237      waitOnPids(pids);
238      // Rerun the unassign. Should fail for all Regions since they already unassigned; failed
239      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
240      pids =
241        hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()));
242      waitOnPids(pids);
243      for (long pid : pids) {
244        assertEquals(Procedure.NO_PROC_ID, pid);
245      }
246      // If we pass override, then we should be able to unassign EVEN THOUGH Regions already
247      // unassigned.... makes for a mess but operator might want to do this at an extreme when
248      // doing fixup of broke cluster.
249      pids = hbck.unassigns(
250        regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()), true);
251      waitOnPids(pids);
252      for (long pid : pids) {
253        assertNotEquals(Procedure.NO_PROC_ID, pid);
254      }
255      // Clean-up by bypassing all the unassigns we just made so tests can continue.
256      hbck.bypassProcedure(pids, 10000, true, true);
257      for (RegionInfo ri : regions) {
258        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
259          .getRegionStates().getRegionState(ri.getEncodedName());
260        LOG.info("RS: {}", rs.toString());
261        assertTrue(rs.toString(), rs.isClosed());
262      }
263      pids =
264        hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()));
265      waitOnPids(pids);
266      // Rerun the assign. Should fail for all Regions since they already assigned; failed
267      // assign will manifest as all pids being -1 (ever since HBASE-24885).
268      pids =
269        hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()));
270      for (long pid : pids) {
271        assertEquals(Procedure.NO_PROC_ID, pid);
272      }
273      for (RegionInfo ri : regions) {
274        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
275          .getRegionStates().getRegionState(ri.getEncodedName());
276        LOG.info("RS: {}", rs.toString());
277        assertTrue(rs.toString(), rs.isOpened());
278      }
279      // What happens if crappy region list passed?
280      pids = hbck.assigns(
281        Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList()));
282      for (long pid : pids) {
283        assertEquals(Procedure.NO_PROC_ID, pid);
284      }
285    }
286  }
287
288  @Test
289  public void testScheduleSCP() throws Exception {
290    HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
291    TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TABLE_NAME), Bytes.toBytes("family1"),
292      true);
293    ServerName serverName = testRs.getServerName();
294    Hbck hbck = getHbck();
295    List<Long> pids =
296      hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
297    assertTrue(pids.get(0) > 0);
298    LOG.info("pid is {}", pids.get(0));
299
300    List<Long> newPids =
301      hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
302    assertTrue(newPids.get(0) < 0);
303    LOG.info("pid is {}", newPids.get(0));
304    waitOnPids(pids);
305  }
306
307  @Test
308  public void testRunHbckChore() throws Exception {
309    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
310    HbckChore hbckChore = master.getHbckChore();
311    Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport())
312      .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH);
313    Hbck hbck = getHbck();
314    TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore);
315    HbckReport report = hbckChore.getLastReport();
316    assertNotNull(report);
317    assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp));
318  }
319
320  public static class FailingSplitAfterMetaUpdatedMasterObserver
321    implements MasterCoprocessor, MasterObserver {
322    @SuppressWarnings("checkstyle:VisibilityModifier")
323    public volatile CountDownLatch latch;
324
325    @Override
326    public void start(CoprocessorEnvironment e) throws IOException {
327      resetLatch();
328    }
329
330    @Override
331    public Optional<MasterObserver> getMasterObserver() {
332      return Optional.of(this);
333    }
334
335    @Override
336    public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx)
337      throws IOException {
338      LOG.info("I'm here");
339      latch.countDown();
340      throw new IOException("this procedure will fail at here forever");
341    }
342
343    public void resetLatch() {
344      this.latch = new CountDownLatch(1);
345    }
346  }
347
348  public static class FailingMergeAfterMetaUpdatedMasterObserver
349    implements MasterCoprocessor, MasterObserver {
350    @SuppressWarnings("checkstyle:VisibilityModifier")
351    public volatile CountDownLatch latch;
352
353    @Override
354    public void start(CoprocessorEnvironment e) throws IOException {
355      resetLatch();
356    }
357
358    @Override
359    public Optional<MasterObserver> getMasterObserver() {
360      return Optional.of(this);
361    }
362
363    public void resetLatch() {
364      this.latch = new CountDownLatch(1);
365    }
366
367    @Override
368    public void postMergeRegionsCommitAction(
369      final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge,
370      final RegionInfo mergedRegion) throws IOException {
371      latch.countDown();
372      throw new IOException("this procedure will fail at here forever");
373    }
374  }
375
376  private void waitOnPids(List<Long> pids) {
377    TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
378  }
379}