001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static org.junit.jupiter.api.Assertions.assertEquals; 021import static org.junit.jupiter.api.Assertions.assertNotEquals; 022import static org.junit.jupiter.api.Assertions.assertNotNull; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024 025import java.io.IOException; 026import java.time.Instant; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Optional; 032import java.util.concurrent.Callable; 033import java.util.concurrent.CountDownLatch; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Collectors; 036import java.util.stream.Stream; 037import org.apache.hadoop.hbase.Coprocessor; 038import org.apache.hadoop.hbase.CoprocessorEnvironment; 039import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate; 040import org.apache.hadoop.hbase.HBaseTestingUtil; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 044import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 045import org.apache.hadoop.hbase.coprocessor.MasterObserver; 046import org.apache.hadoop.hbase.coprocessor.ObserverContext; 047import org.apache.hadoop.hbase.master.HMaster; 048import org.apache.hadoop.hbase.master.RegionState; 049import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 050import org.apache.hadoop.hbase.master.hbck.HbckChore; 051import org.apache.hadoop.hbase.master.hbck.HbckReport; 052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 053import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; 054import org.apache.hadoop.hbase.procedure2.Procedure; 055import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 056import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 057import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 058import org.apache.hadoop.hbase.regionserver.HRegionServer; 059import org.apache.hadoop.hbase.testclassification.ClientTests; 060import org.apache.hadoop.hbase.testclassification.LargeTests; 061import org.apache.hadoop.hbase.util.Bytes; 062import org.junit.jupiter.api.AfterAll; 063import org.junit.jupiter.api.BeforeAll; 064import org.junit.jupiter.api.BeforeEach; 065import org.junit.jupiter.api.Tag; 066import org.junit.jupiter.api.TestTemplate; 067import org.junit.jupiter.params.provider.Arguments; 068import org.slf4j.Logger; 069import org.slf4j.LoggerFactory; 070 071import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 072 073/** 074 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down 075 * afterward. Add any testing of HBaseHbck functionality here. 076 */ 077@Tag(LargeTests.TAG) 078@Tag(ClientTests.TAG) 079@HBaseParameterizedTestTemplate(name = "{index}: async={0}") 080public class TestHbck { 081 082 private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class); 083 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 084 085 private final boolean async; 086 087 private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); 088 089 private static ProcedureExecutor<MasterProcedureEnv> procExec; 090 091 private static AsyncConnection ASYNC_CONN; 092 093 public static Stream<Arguments> parameters() { 094 return Stream.of(Arguments.of(false), Arguments.of(true)); 095 } 096 097 public TestHbck(boolean async) { 098 this.async = async; 099 } 100 101 private Hbck getHbck() throws Exception { 102 if (async) { 103 return ASYNC_CONN.getHbck().get(); 104 } else { 105 return TEST_UTIL.getHbck(); 106 } 107 } 108 109 @BeforeAll 110 public static void setUpBeforeClass() throws Exception { 111 TEST_UTIL.startMiniCluster(3); 112 TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") }); 113 procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 114 ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get(); 115 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 116 FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 117 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 118 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 119 FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 120 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 121 } 122 123 @AfterAll 124 public static void tearDownAfterClass() throws Exception { 125 Closeables.close(ASYNC_CONN, true); 126 TEST_UTIL.shutdownMiniCluster(); 127 } 128 129 @BeforeEach 130 public void setUp() throws IOException { 131 TEST_UTIL.ensureSomeRegionServersAvailable(3); 132 } 133 134 public static class SuspendProcedure extends 135 ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface { 136 public SuspendProcedure() { 137 super(); 138 } 139 140 @SuppressWarnings({ "rawtypes", "unchecked" }) 141 @Override 142 protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException { 143 // Always suspend the procedure 144 throw new ProcedureSuspendedException(); 145 } 146 147 @Override 148 public TableName getTableName() { 149 return TABLE_NAME; 150 } 151 152 @Override 153 public TableOperationType getTableOperationType() { 154 return TableOperationType.READ; 155 } 156 } 157 158 @TestTemplate 159 public void testBypassProcedure() throws Exception { 160 // SuspendProcedure 161 final SuspendProcedure proc = new SuspendProcedure(); 162 long procId = procExec.submitProcedure(proc); 163 Thread.sleep(500); 164 165 // bypass the procedure 166 List<Long> pids = Arrays.<Long> asList(procId); 167 List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false); 168 assertTrue(results.get(0), "Failed to by pass procedure!"); 169 TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); 170 LOG.info("{} finished", proc); 171 } 172 173 @TestTemplate 174 public void testSetTableStateInMeta() throws Exception { 175 Hbck hbck = getHbck(); 176 // set table state to DISABLED 177 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED)); 178 // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case 179 // will be DISABLED 180 TableState prevState = 181 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED)); 182 assertTrue(prevState.isDisabled(), 183 "Incorrect previous state! expected=DISABLED, found=" + prevState.getState()); 184 } 185 186 @TestTemplate 187 public void testSetRegionStateInMeta() throws Exception { 188 Hbck hbck = getHbck(); 189 Admin admin = TEST_UTIL.getAdmin(); 190 TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME); 191 final List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 192 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 193 final Map<String, RegionState.State> beforeStates = new HashMap<>(); 194 final Map<String, RegionState.State> requestStates = new HashMap<>(); 195 regions.forEach(r -> { 196 RegionState beforeState = am.getRegionStates().getRegionState(r); 197 beforeStates.put(r.getEncodedName(), beforeState.getState()); 198 LOG.debug("Before test: {} ; {}", r, beforeState.getState()); 199 requestStates.put(r.getEncodedName(), RegionState.State.CLOSED); 200 }); 201 final Callable<Void> doTest = () -> { 202 // run the entire test with the ProcedureExecution environment paused. This prevents 203 // background operations from modifying AM internal state between the assertions this test 204 // relies upon. 205 Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates); 206 result.forEach((k, v) -> { 207 RegionState.State beforeState = beforeStates.get(k); 208 assertEquals(beforeState, v, "response state should match before state; " + k); 209 }); 210 regions.forEach(r -> { 211 RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName()); 212 RegionState.State expectedState = requestStates.get(r.getEncodedName()); 213 LOG.debug("After test: {}, {}", r, afterState); 214 assertEquals(expectedState, afterState.getState(), 215 "state in AM should match requested state ; " + r); 216 }); 217 return null; 218 }; 219 ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true); 220 // restore the table as we found it -- fragile? 221 hbck.setRegionStateInMeta(beforeStates); 222 } 223 224 @TestTemplate 225 public void testAssigns() throws Exception { 226 Hbck hbck = getHbck(); 227 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 228 try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { 229 List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream() 230 .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> { 231 final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName()); 232 LOG.info("RS: {}", rs); 233 }).collect(Collectors.toList()); 234 List<Long> pids = hbck 235 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 236 waitOnPids(pids); 237 // Rerun the unassign. Should fail for all Regions since they already unassigned; failed 238 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 239 pids = hbck 240 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 241 waitOnPids(pids); 242 for (long pid : pids) { 243 assertEquals(Procedure.NO_PROC_ID, pid); 244 } 245 // Rerun the unassign with override. Should fail for all Regions since they already 246 // unassigned; failed 247 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 248 pids = hbck.unassigns( 249 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false); 250 waitOnPids(pids); 251 for (long pid : pids) { 252 assertEquals(Procedure.NO_PROC_ID, pid); 253 } 254 // If we pass force, then we should be able to unassign EVEN THOUGH Regions already 255 // unassigned.... makes for a mess but operator might want to do this at an extreme when 256 // doing fixup of broke cluster. 257 pids = hbck.unassigns( 258 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, true); 259 waitOnPids(pids); 260 for (long pid : pids) { 261 assertNotEquals(Procedure.NO_PROC_ID, pid); 262 } 263 // Clean-up by bypassing all the unassigns we just made so tests can continue. 264 hbck.bypassProcedure(pids, 10000, true, true); 265 for (RegionInfo ri : regions) { 266 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 267 .getRegionStates().getRegionState(ri.getEncodedName()); 268 LOG.info("RS: {}", rs.toString()); 269 assertTrue(rs.isClosed(), rs.toString()); 270 } 271 pids = 272 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 273 waitOnPids(pids); 274 // Rerun the assign. Should fail for all Regions since they already assigned; failed 275 // assign will manifest as all pids being -1 (ever since HBASE-24885). 276 pids = 277 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 278 for (long pid : pids) { 279 assertEquals(Procedure.NO_PROC_ID, pid); 280 } 281 for (RegionInfo ri : regions) { 282 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 283 .getRegionStates().getRegionState(ri.getEncodedName()); 284 LOG.info("RS: {}", rs.toString()); 285 assertTrue(rs.isOpened(), rs.toString()); 286 } 287 // Rerun the assign with override. Should fail for all Regions since they already assigned 288 pids = hbck.assigns( 289 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true, false); 290 for (long pid : pids) { 291 assertEquals(Procedure.NO_PROC_ID, pid); 292 } 293 // What happens if crappy region list passed? 294 pids = hbck.assigns( 295 Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList())); 296 for (long pid : pids) { 297 assertEquals(Procedure.NO_PROC_ID, pid); 298 } 299 } 300 } 301 302 @TestTemplate 303 public void testScheduleSCP() throws Exception { 304 HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME); 305 try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) { 306 TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true); 307 } 308 ServerName serverName = testRs.getServerName(); 309 Hbck hbck = getHbck(); 310 List<Long> pids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName)); 311 assertEquals(1, pids.size()); 312 assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0)); 313 LOG.debug("SCP pid is {}", pids.get(0)); 314 315 List<Long> newPids = hbck.scheduleServerCrashProcedures(Arrays.asList(serverName)); 316 assertEquals(1, pids.size()); 317 assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0)); 318 waitOnPids(pids); 319 } 320 321 @TestTemplate 322 public void testRunHbckChore() throws Exception { 323 HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 324 HbckChore hbckChore = master.getHbckChore(); 325 Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport()) 326 .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH); 327 Hbck hbck = getHbck(); 328 TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore); 329 HbckReport report = hbckChore.getLastReport(); 330 assertNotNull(report); 331 assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp)); 332 } 333 334 public static class FailingSplitAfterMetaUpdatedMasterObserver 335 implements MasterCoprocessor, MasterObserver { 336 @SuppressWarnings("checkstyle:VisibilityModifier") 337 public volatile CountDownLatch latch; 338 339 @Override 340 public void start(CoprocessorEnvironment e) throws IOException { 341 resetLatch(); 342 } 343 344 @Override 345 public Optional<MasterObserver> getMasterObserver() { 346 return Optional.of(this); 347 } 348 349 @Override 350 public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx) 351 throws IOException { 352 LOG.info("I'm here"); 353 latch.countDown(); 354 throw new IOException("this procedure will fail at here forever"); 355 } 356 357 public void resetLatch() { 358 this.latch = new CountDownLatch(1); 359 } 360 } 361 362 public static class FailingMergeAfterMetaUpdatedMasterObserver 363 implements MasterCoprocessor, MasterObserver { 364 @SuppressWarnings("checkstyle:VisibilityModifier") 365 public volatile CountDownLatch latch; 366 367 @Override 368 public void start(CoprocessorEnvironment e) throws IOException { 369 resetLatch(); 370 } 371 372 @Override 373 public Optional<MasterObserver> getMasterObserver() { 374 return Optional.of(this); 375 } 376 377 public void resetLatch() { 378 this.latch = new CountDownLatch(1); 379 } 380 381 @Override 382 public void postMergeRegionsCommitAction( 383 final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge, 384 final RegionInfo mergedRegion) throws IOException { 385 latch.countDown(); 386 throw new IOException("this procedure will fail at here forever"); 387 } 388 } 389 390 private void waitOnPids(List<Long> pids) { 391 TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished)); 392 } 393}