001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.time.Instant; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Optional; 032import java.util.concurrent.CountDownLatch; 033import java.util.concurrent.TimeUnit; 034import java.util.stream.Collectors; 035import org.apache.hadoop.hbase.Coprocessor; 036import org.apache.hadoop.hbase.CoprocessorEnvironment; 037import org.apache.hadoop.hbase.HBaseClassTestRule; 038import org.apache.hadoop.hbase.HBaseTestingUtility; 039import org.apache.hadoop.hbase.ServerName; 040import org.apache.hadoop.hbase.TableName; 041import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 043import org.apache.hadoop.hbase.coprocessor.MasterObserver; 044import org.apache.hadoop.hbase.coprocessor.ObserverContext; 045import org.apache.hadoop.hbase.master.HMaster; 046import org.apache.hadoop.hbase.master.RegionState; 047import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 048import org.apache.hadoop.hbase.master.hbck.HbckChore; 049import org.apache.hadoop.hbase.master.hbck.HbckReport; 050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 051import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; 052import org.apache.hadoop.hbase.procedure2.Procedure; 053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 054import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 055import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 056import org.apache.hadoop.hbase.regionserver.HRegionServer; 057import org.apache.hadoop.hbase.testclassification.ClientTests; 058import org.apache.hadoop.hbase.testclassification.LargeTests; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.Pair; 061import org.junit.AfterClass; 062import org.junit.Before; 063import org.junit.BeforeClass; 064import org.junit.ClassRule; 065import org.junit.Rule; 066import org.junit.Test; 067import org.junit.experimental.categories.Category; 068import org.junit.rules.TestName; 069import org.junit.runner.RunWith; 070import org.junit.runners.Parameterized; 071import org.junit.runners.Parameterized.Parameter; 072import org.junit.runners.Parameterized.Parameters; 073import org.slf4j.Logger; 074import org.slf4j.LoggerFactory; 075 076import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 077 078import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 079 080/** 081 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down 082 * afterward. Add any testing of HBaseHbck functionality here. 083 */ 084@RunWith(Parameterized.class) 085@Category({ LargeTests.class, ClientTests.class }) 086public class TestHbck { 087 @ClassRule 088 public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class); 089 090 private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class); 091 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 092 093 @Rule 094 public TestName name = new TestName(); 095 096 @SuppressWarnings("checkstyle:VisibilityModifier") 097 @Parameter 098 public boolean async; 099 100 private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); 101 102 private static ProcedureExecutor<MasterProcedureEnv> procExec; 103 104 private static AsyncConnection ASYNC_CONN; 105 106 @Parameters(name = "{index}: async={0}") 107 public static List<Object[]> params() { 108 return Arrays.asList(new Object[] { false }, new Object[] { true }); 109 } 110 111 private Hbck getHbck() throws Exception { 112 if (async) { 113 return ASYNC_CONN.getHbck().get(); 114 } else { 115 return TEST_UTIL.getHbck(); 116 } 117 } 118 119 @BeforeClass 120 public static void setUpBeforeClass() throws Exception { 121 TEST_UTIL.startMiniCluster(3); 122 TEST_UTIL.createMultiRegionTable(TABLE_NAME, Bytes.toBytes("family1"), 5); 123 procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 124 ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get(); 125 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 126 FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 127 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 128 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 129 FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 130 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 131 } 132 133 @AfterClass 134 public static void tearDownAfterClass() throws Exception { 135 Closeables.close(ASYNC_CONN, true); 136 TEST_UTIL.shutdownMiniCluster(); 137 } 138 139 @Before 140 public void setUp() throws IOException { 141 TEST_UTIL.ensureSomeRegionServersAvailable(3); 142 } 143 144 public static class SuspendProcedure extends 145 ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface { 146 public SuspendProcedure() { 147 super(); 148 } 149 150 @SuppressWarnings({ "rawtypes", "unchecked" }) 151 @Override 152 protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException { 153 // Always suspend the procedure 154 throw new ProcedureSuspendedException(); 155 } 156 157 @Override 158 public TableName getTableName() { 159 return TABLE_NAME; 160 } 161 162 @Override 163 public TableOperationType getTableOperationType() { 164 return TableOperationType.READ; 165 } 166 } 167 168 @Test 169 public void testBypassProcedure() throws Exception { 170 // SuspendProcedure 171 final SuspendProcedure proc = new SuspendProcedure(); 172 long procId = procExec.submitProcedure(proc); 173 Thread.sleep(500); 174 175 // bypass the procedure 176 List<Long> pids = Arrays.<Long> asList(procId); 177 List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false); 178 assertTrue("Failed to by pass procedure!", results.get(0)); 179 TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); 180 LOG.info("{} finished", proc); 181 } 182 183 @Test 184 public void testSetTableStateInMeta() throws Exception { 185 Hbck hbck = getHbck(); 186 // set table state to DISABLED 187 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED)); 188 // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case 189 // will be DISABLED 190 TableState prevState = 191 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED)); 192 assertTrue("Incorrect previous state! expeced=DISABLED, found=" + prevState.getState(), 193 prevState.isDisabled()); 194 } 195 196 @Test 197 public void testSetRegionStateInMeta() throws Exception { 198 Hbck hbck = getHbck(); 199 Admin admin = TEST_UTIL.getAdmin(); 200 final List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 201 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 202 Map<String, RegionState.State> prevStates = new HashMap<>(); 203 Map<String, RegionState.State> newStates = new HashMap<>(); 204 final Map<String, Pair<RegionState.State, RegionState.State>> regionsMap = new HashMap<>(); 205 regions.forEach(r -> { 206 RegionState prevState = am.getRegionStates().getRegionState(r); 207 prevStates.put(r.getEncodedName(), prevState.getState()); 208 newStates.put(r.getEncodedName(), RegionState.State.CLOSED); 209 regionsMap.put(r.getEncodedName(), 210 new Pair<>(prevState.getState(), RegionState.State.CLOSED)); 211 }); 212 final Map<String, RegionState.State> result = hbck.setRegionStateInMeta(newStates); 213 result.forEach((k, v) -> { 214 RegionState.State prevState = regionsMap.get(k).getFirst(); 215 assertEquals(prevState, v); 216 }); 217 regions.forEach(r -> { 218 RegionState cachedState = am.getRegionStates().getRegionState(r.getEncodedName()); 219 RegionState.State newState = regionsMap.get(r.getEncodedName()).getSecond(); 220 assertEquals(newState, cachedState.getState()); 221 }); 222 hbck.setRegionStateInMeta(prevStates); 223 } 224 225 @Test 226 public void testAssigns() throws Exception { 227 Hbck hbck = getHbck(); 228 try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { 229 List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 230 for (RegionInfo ri : regions) { 231 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 232 .getRegionStates().getRegionState(ri.getEncodedName()); 233 LOG.info("RS: {}", rs.toString()); 234 } 235 List<Long> pids = 236 hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); 237 waitOnPids(pids); 238 // Rerun the unassign. Should fail for all Regions since they already unassigned; failed 239 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 240 pids = 241 hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); 242 waitOnPids(pids); 243 for (long pid : pids) { 244 assertEquals(Procedure.NO_PROC_ID, pid); 245 } 246 // If we pass override, then we should be able to unassign EVEN THOUGH Regions already 247 // unassigned.... makes for a mess but operator might want to do this at an extreme when 248 // doing fixup of broke cluster. 249 pids = hbck.unassigns( 250 regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()), true); 251 waitOnPids(pids); 252 for (long pid : pids) { 253 assertNotEquals(Procedure.NO_PROC_ID, pid); 254 } 255 // Clean-up by bypassing all the unassigns we just made so tests can continue. 256 hbck.bypassProcedure(pids, 10000, true, true); 257 for (RegionInfo ri : regions) { 258 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 259 .getRegionStates().getRegionState(ri.getEncodedName()); 260 LOG.info("RS: {}", rs.toString()); 261 assertTrue(rs.toString(), rs.isClosed()); 262 } 263 pids = 264 hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); 265 waitOnPids(pids); 266 // Rerun the assign. Should fail for all Regions since they already assigned; failed 267 // assign will manifest as all pids being -1 (ever since HBASE-24885). 268 pids = 269 hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); 270 for (long pid : pids) { 271 assertEquals(Procedure.NO_PROC_ID, pid); 272 } 273 for (RegionInfo ri : regions) { 274 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 275 .getRegionStates().getRegionState(ri.getEncodedName()); 276 LOG.info("RS: {}", rs.toString()); 277 assertTrue(rs.toString(), rs.isOpened()); 278 } 279 // What happens if crappy region list passed? 280 pids = hbck.assigns( 281 Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList())); 282 for (long pid : pids) { 283 assertEquals(Procedure.NO_PROC_ID, pid); 284 } 285 } 286 } 287 288 @Test 289 public void testScheduleSCP() throws Exception { 290 HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME); 291 TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TABLE_NAME), Bytes.toBytes("family1"), 292 true); 293 ServerName serverName = testRs.getServerName(); 294 Hbck hbck = getHbck(); 295 List<Long> pids = 296 hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName))); 297 assertTrue(pids.get(0) > 0); 298 LOG.info("pid is {}", pids.get(0)); 299 300 List<Long> newPids = 301 hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName))); 302 assertTrue(newPids.get(0) < 0); 303 LOG.info("pid is {}", newPids.get(0)); 304 waitOnPids(pids); 305 } 306 307 @Test 308 public void testRunHbckChore() throws Exception { 309 HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 310 HbckChore hbckChore = master.getHbckChore(); 311 Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport()) 312 .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH); 313 Hbck hbck = getHbck(); 314 TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore); 315 HbckReport report = hbckChore.getLastReport(); 316 assertNotNull(report); 317 assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp)); 318 } 319 320 public static class FailingSplitAfterMetaUpdatedMasterObserver 321 implements MasterCoprocessor, MasterObserver { 322 @SuppressWarnings("checkstyle:VisibilityModifier") 323 public volatile CountDownLatch latch; 324 325 @Override 326 public void start(CoprocessorEnvironment e) throws IOException { 327 resetLatch(); 328 } 329 330 @Override 331 public Optional<MasterObserver> getMasterObserver() { 332 return Optional.of(this); 333 } 334 335 @Override 336 public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx) 337 throws IOException { 338 LOG.info("I'm here"); 339 latch.countDown(); 340 throw new IOException("this procedure will fail at here forever"); 341 } 342 343 public void resetLatch() { 344 this.latch = new CountDownLatch(1); 345 } 346 } 347 348 public static class FailingMergeAfterMetaUpdatedMasterObserver 349 implements MasterCoprocessor, MasterObserver { 350 @SuppressWarnings("checkstyle:VisibilityModifier") 351 public volatile CountDownLatch latch; 352 353 @Override 354 public void start(CoprocessorEnvironment e) throws IOException { 355 resetLatch(); 356 } 357 358 @Override 359 public Optional<MasterObserver> getMasterObserver() { 360 return Optional.of(this); 361 } 362 363 public void resetLatch() { 364 this.latch = new CountDownLatch(1); 365 } 366 367 @Override 368 public void postMergeRegionsCommitAction( 369 final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge, 370 final RegionInfo mergedRegion) throws IOException { 371 latch.countDown(); 372 throw new IOException("this procedure will fail at here forever"); 373 } 374 } 375 376 private void waitOnPids(List<Long> pids) { 377 TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished)); 378 } 379}