001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.time.Instant; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Optional; 032import java.util.concurrent.Callable; 033import java.util.concurrent.CountDownLatch; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Collectors; 036import org.apache.hadoop.hbase.Coprocessor; 037import org.apache.hadoop.hbase.CoprocessorEnvironment; 038import org.apache.hadoop.hbase.HBaseClassTestRule; 039import org.apache.hadoop.hbase.HBaseTestingUtil; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 044import org.apache.hadoop.hbase.coprocessor.MasterObserver; 045import org.apache.hadoop.hbase.coprocessor.ObserverContext; 046import org.apache.hadoop.hbase.master.HMaster; 047import org.apache.hadoop.hbase.master.RegionState; 048import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 049import org.apache.hadoop.hbase.master.hbck.HbckChore; 050import org.apache.hadoop.hbase.master.hbck.HbckReport; 051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 052import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; 053import org.apache.hadoop.hbase.procedure2.Procedure; 054import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 055import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 056import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 057import org.apache.hadoop.hbase.regionserver.HRegionServer; 058import org.apache.hadoop.hbase.testclassification.ClientTests; 059import org.apache.hadoop.hbase.testclassification.LargeTests; 060import org.apache.hadoop.hbase.util.Bytes; 061import org.junit.AfterClass; 062import org.junit.Before; 063import org.junit.BeforeClass; 064import org.junit.ClassRule; 065import org.junit.Rule; 066import org.junit.Test; 067import org.junit.experimental.categories.Category; 068import org.junit.rules.TestName; 069import org.junit.runner.RunWith; 070import org.junit.runners.Parameterized; 071import org.junit.runners.Parameterized.Parameter; 072import org.junit.runners.Parameterized.Parameters; 073import org.slf4j.Logger; 074import org.slf4j.LoggerFactory; 075 076import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 077 078/** 079 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down 080 * afterward. Add any testing of HBaseHbck functionality here. 081 */ 082@RunWith(Parameterized.class) 083@Category({ LargeTests.class, ClientTests.class }) 084public class TestHbck { 085 @ClassRule 086 public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class); 087 088 private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class); 089 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 090 091 @Rule 092 public TestName name = new TestName(); 093 094 @SuppressWarnings("checkstyle:VisibilityModifier") 095 @Parameter 096 public boolean async; 097 098 private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); 099 100 private static ProcedureExecutor<MasterProcedureEnv> procExec; 101 102 private static AsyncConnection ASYNC_CONN; 103 104 @Parameters(name = "{index}: async={0}") 105 public static List<Object[]> params() { 106 return Arrays.asList(new Object[] { false }, new Object[] { true }); 107 } 108 109 private Hbck getHbck() throws Exception { 110 if (async) { 111 return ASYNC_CONN.getHbck().get(); 112 } else { 113 return TEST_UTIL.getHbck(); 114 } 115 } 116 117 @BeforeClass 118 public static void setUpBeforeClass() throws Exception { 119 TEST_UTIL.startMiniCluster(3); 120 TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") }); 121 procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 122 ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get(); 123 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 124 FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 125 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 126 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 127 FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 128 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 129 } 130 131 @AfterClass 132 public static void tearDownAfterClass() throws Exception { 133 Closeables.close(ASYNC_CONN, true); 134 TEST_UTIL.shutdownMiniCluster(); 135 } 136 137 @Before 138 public void setUp() throws IOException { 139 TEST_UTIL.ensureSomeRegionServersAvailable(3); 140 } 141 142 public static class SuspendProcedure extends 143 ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface { 144 public SuspendProcedure() { 145 super(); 146 } 147 148 @SuppressWarnings({ "rawtypes", "unchecked" }) 149 @Override 150 protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException { 151 // Always suspend the procedure 152 throw new ProcedureSuspendedException(); 153 } 154 155 @Override 156 public TableName getTableName() { 157 return TABLE_NAME; 158 } 159 160 @Override 161 public TableOperationType getTableOperationType() { 162 return TableOperationType.READ; 163 } 164 } 165 166 @Test 167 public void testBypassProcedure() throws Exception { 168 // SuspendProcedure 169 final SuspendProcedure proc = new SuspendProcedure(); 170 long procId = procExec.submitProcedure(proc); 171 Thread.sleep(500); 172 173 // bypass the procedure 174 List<Long> pids = Arrays.<Long> asList(procId); 175 List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false); 176 assertTrue("Failed to by pass procedure!", results.get(0)); 177 TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); 178 LOG.info("{} finished", proc); 179 } 180 181 @Test 182 public void testSetTableStateInMeta() throws Exception { 183 Hbck hbck = getHbck(); 184 // set table state to DISABLED 185 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED)); 186 // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case 187 // will be DISABLED 188 TableState prevState = 189 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED)); 190 assertTrue("Incorrect previous state! expected=DISABLED, found=" + prevState.getState(), 191 prevState.isDisabled()); 192 } 193 194 @Test 195 public void testSetRegionStateInMeta() throws Exception { 196 Hbck hbck = getHbck(); 197 Admin admin = TEST_UTIL.getAdmin(); 198 TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME); 199 final List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 200 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 201 final Map<String, RegionState.State> beforeStates = new HashMap<>(); 202 final Map<String, RegionState.State> requestStates = new HashMap<>(); 203 regions.forEach(r -> { 204 RegionState beforeState = am.getRegionStates().getRegionState(r); 205 beforeStates.put(r.getEncodedName(), beforeState.getState()); 206 LOG.debug("Before test: {} ; {}", r, beforeState.getState()); 207 requestStates.put(r.getEncodedName(), RegionState.State.CLOSED); 208 }); 209 final Callable<Void> doTest = () -> { 210 // run the entire test with the ProcedureExecution environment paused. This prevents 211 // background operations from modifying AM internal state between the assertions this test 212 // relies upon. 213 Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates); 214 result.forEach((k, v) -> { 215 RegionState.State beforeState = beforeStates.get(k); 216 assertEquals("response state should match before state; " + k, beforeState, v); 217 }); 218 regions.forEach(r -> { 219 RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName()); 220 RegionState.State expectedState = requestStates.get(r.getEncodedName()); 221 LOG.debug("After test: {}, {}", r, afterState); 222 assertEquals("state in AM should match requested state ; " + r, expectedState, 223 afterState.getState()); 224 }); 225 return null; 226 }; 227 ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true); 228 // restore the table as we found it -- fragile? 229 hbck.setRegionStateInMeta(beforeStates); 230 } 231 232 @Test 233 public void testAssigns() throws Exception { 234 Hbck hbck = getHbck(); 235 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 236 try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { 237 List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream() 238 .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> { 239 final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName()); 240 LOG.info("RS: {}", rs); 241 }).collect(Collectors.toList()); 242 List<Long> pids = hbck 243 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 244 waitOnPids(pids); 245 // Rerun the unassign. Should fail for all Regions since they already unassigned; failed 246 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 247 pids = hbck 248 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 249 waitOnPids(pids); 250 for (long pid : pids) { 251 assertEquals(Procedure.NO_PROC_ID, pid); 252 } 253 // Rerun the unassign with override. Should fail for all Regions since they already 254 // unassigned; failed 255 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 256 pids = hbck.unassigns( 257 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false); 258 waitOnPids(pids); 259 for (long pid : pids) { 260 assertEquals(Procedure.NO_PROC_ID, pid); 261 } 262 // If we pass force, then we should be able to unassign EVEN THOUGH Regions already 263 // unassigned.... makes for a mess but operator might want to do this at an extreme when 264 // doing fixup of broke cluster. 265 pids = hbck.unassigns( 266 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, true); 267 waitOnPids(pids); 268 for (long pid : pids) { 269 assertNotEquals(Procedure.NO_PROC_ID, pid); 270 } 271 // Clean-up by bypassing all the unassigns we just made so tests can continue. 272 hbck.bypassProcedure(pids, 10000, true, true); 273 for (RegionInfo ri : regions) { 274 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 275 .getRegionStates().getRegionState(ri.getEncodedName()); 276 LOG.info("RS: {}", rs.toString()); 277 assertTrue(rs.toString(), rs.isClosed()); 278 } 279 pids = 280 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 281 waitOnPids(pids); 282 // Rerun the assign. Should fail for all Regions since they already assigned; failed 283 // assign will manifest as all pids being -1 (ever since HBASE-24885). 284 pids = 285 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 286 for (long pid : pids) { 287 assertEquals(Procedure.NO_PROC_ID, pid); 288 } 289 for (RegionInfo ri : regions) { 290 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 291 .getRegionStates().getRegionState(ri.getEncodedName()); 292 LOG.info("RS: {}", rs.toString()); 293 assertTrue(rs.toString(), rs.isOpened()); 294 } 295 // Rerun the assign with override. Should fail for all Regions since they already assigned 296 pids = hbck.assigns( 297 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false); 298 for (long pid : pids) { 299 assertEquals(Procedure.NO_PROC_ID, pid); 300 } 301 // What happens if crappy region list passed? 302 pids = hbck.assigns( 303 Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList())); 304 for (long pid : pids) { 305 assertEquals(Procedure.NO_PROC_ID, pid); 306 } 307 } 308 } 309 310 @Test 311 public void testScheduleSCP() throws Exception { 312 HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME); 313 try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) { 314 TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true); 315 } 316 ServerName serverName = testRs.getServerName(); 317 Hbck hbck = getHbck(); 318 List<Long> pids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName)); 319 assertEquals(1, pids.size()); 320 assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0)); 321 LOG.debug("SCP pid is {}", pids.get(0)); 322 323 List<Long> newPids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName)); 324 assertEquals(1, pids.size()); 325 assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0)); 326 waitOnPids(pids); 327 } 328 329 @Test 330 public void testRunHbckChore() throws Exception { 331 HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 332 HbckChore hbckChore = master.getHbckChore(); 333 Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport()) 334 .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH); 335 Hbck hbck = getHbck(); 336 TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore); 337 HbckReport report = hbckChore.getLastReport(); 338 assertNotNull(report); 339 assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp)); 340 } 341 342 public static class FailingSplitAfterMetaUpdatedMasterObserver 343 implements MasterCoprocessor, MasterObserver { 344 @SuppressWarnings("checkstyle:VisibilityModifier") 345 public volatile CountDownLatch latch; 346 347 @Override 348 public void start(CoprocessorEnvironment e) throws IOException { 349 resetLatch(); 350 } 351 352 @Override 353 public Optional<MasterObserver> getMasterObserver() { 354 return Optional.of(this); 355 } 356 357 @Override 358 public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx) 359 throws IOException { 360 LOG.info("I'm here"); 361 latch.countDown(); 362 throw new IOException("this procedure will fail at here forever"); 363 } 364 365 public void resetLatch() { 366 this.latch = new CountDownLatch(1); 367 } 368 } 369 370 public static class FailingMergeAfterMetaUpdatedMasterObserver 371 implements MasterCoprocessor, MasterObserver { 372 @SuppressWarnings("checkstyle:VisibilityModifier") 373 public volatile CountDownLatch latch; 374 375 @Override 376 public void start(CoprocessorEnvironment e) throws IOException { 377 resetLatch(); 378 } 379 380 @Override 381 public Optional<MasterObserver> getMasterObserver() { 382 return Optional.of(this); 383 } 384 385 public void resetLatch() { 386 this.latch = new CountDownLatch(1); 387 } 388 389 @Override 390 public void postMergeRegionsCommitAction( 391 final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge, 392 final RegionInfo mergedRegion) throws IOException { 393 latch.countDown(); 394 throw new IOException("this procedure will fail at here forever"); 395 } 396 } 397 398 private void waitOnPids(List<Long> pids) { 399 TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished)); 400 } 401}