001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.concurrent.Semaphore; 022import org.apache.hadoop.hbase.HBaseTestingUtil; 023import org.apache.hadoop.hbase.TableName; 024import org.apache.hadoop.hbase.procedure2.Procedure; 025import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 026import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 027import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 028import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.NoopProcedure; 029import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 030import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; 031import org.apache.hadoop.hbase.testclassification.MasterTests; 032import org.apache.hadoop.hbase.testclassification.SmallTests; 033import org.junit.jupiter.api.AfterAll; 034import org.junit.jupiter.api.AfterEach; 035import org.junit.jupiter.api.BeforeEach; 036import org.junit.jupiter.api.Tag; 037import org.junit.jupiter.api.Test; 038import org.junit.jupiter.api.TestInfo; 039 040@Tag(MasterTests.TAG) 041@Tag(SmallTests.TAG) 042public class TestSchedulerQueueDeadLock { 043 044 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 045 046 private static final TableName TABLE_NAME = TableName.valueOf("deadlock"); 047 048 private static final class TestEnv { 049 private final MasterProcedureScheduler scheduler; 050 051 public TestEnv(MasterProcedureScheduler scheduler) { 052 this.scheduler = scheduler; 053 } 054 055 public MasterProcedureScheduler getScheduler() { 056 return scheduler; 057 } 058 } 059 060 public static class TableSharedProcedure extends NoopProcedure<TestEnv> 061 implements TableProcedureInterface { 062 063 private final Semaphore latch = new Semaphore(0); 064 065 @Override 066 protected Procedure<TestEnv>[] execute(TestEnv env) 067 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 068 latch.acquire(); 069 return null; 070 } 071 072 @Override 073 protected LockState acquireLock(TestEnv env) { 074 if (env.getScheduler().waitTableSharedLock(this, getTableName())) { 075 return LockState.LOCK_EVENT_WAIT; 076 } 077 return LockState.LOCK_ACQUIRED; 078 } 079 080 @Override 081 protected void releaseLock(TestEnv env) { 082 env.getScheduler().wakeTableSharedLock(this, getTableName()); 083 } 084 085 @Override 086 protected boolean holdLock(TestEnv env) { 087 return true; 088 } 089 090 @Override 091 public TableName getTableName() { 092 return TABLE_NAME; 093 } 094 095 @Override 096 public TableOperationType getTableOperationType() { 097 return TableOperationType.READ; 098 } 099 } 100 101 public static class TableExclusiveProcedure extends NoopProcedure<TestEnv> 102 implements TableProcedureInterface { 103 104 private final Semaphore latch = new Semaphore(0); 105 106 @Override 107 protected Procedure<TestEnv>[] execute(TestEnv env) 108 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 109 latch.acquire(); 110 return null; 111 } 112 113 @Override 114 protected LockState acquireLock(TestEnv env) { 115 if (env.getScheduler().waitTableExclusiveLock(this, getTableName())) { 116 return LockState.LOCK_EVENT_WAIT; 117 } 118 return LockState.LOCK_ACQUIRED; 119 } 120 121 @Override 122 protected void releaseLock(TestEnv env) { 123 env.getScheduler().wakeTableExclusiveLock(this, getTableName()); 124 } 125 126 @Override 127 protected boolean holdLock(TestEnv env) { 128 return true; 129 } 130 131 @Override 132 public TableName getTableName() { 133 return TABLE_NAME; 134 } 135 136 @Override 137 public TableOperationType getTableOperationType() { 138 return TableOperationType.EDIT; 139 } 140 } 141 142 @AfterAll 143 public static void tearDownAfterClass() throws IOException { 144 UTIL.cleanupTestDir(); 145 } 146 147 private WALProcedureStore procStore; 148 149 private ProcedureExecutor<TestEnv> procExec; 150 151 @BeforeEach 152 public void setUp(TestInfo testInfo) throws IOException { 153 UTIL.getConfiguration().setInt("hbase.procedure.worker.stuck.threshold.msec", 6000000); 154 String testMethodName = testInfo.getTestMethod().get().getName(); 155 procStore = ProcedureTestingUtility.createWalStore(UTIL.getConfiguration(), 156 UTIL.getDataTestDir(testMethodName)); 157 procStore.start(1); 158 MasterProcedureScheduler scheduler = new MasterProcedureScheduler(pid -> null); 159 procExec = new ProcedureExecutor<>(UTIL.getConfiguration(), new TestEnv(scheduler), procStore, 160 scheduler); 161 procExec.init(1, false); 162 } 163 164 @AfterEach 165 public void tearDown() { 166 if (procExec != null) { 167 procExec.stop(); 168 } 169 if (procStore != null) { 170 procStore.stop(false); 171 } 172 } 173 174 public static final class TableSharedProcedureWithId extends TableSharedProcedure { 175 176 @Override 177 protected void setProcId(long procId) { 178 // this is a hack to make this procedure be loaded after the procedure below as we will sort 179 // the procedures by id when loading. 180 super.setProcId(2L); 181 } 182 } 183 184 public static final class TableExclusiveProcedureWithId extends TableExclusiveProcedure { 185 186 @Override 187 protected void setProcId(long procId) { 188 // this is a hack to make this procedure be loaded before the procedure above as we will 189 // sort the procedures by id when loading. 190 super.setProcId(1L); 191 } 192 } 193 194 @Test 195 public void testTableProcedureDeadLockAfterRestarting() throws Exception { 196 // let the shared procedure run first, but let it have a greater procId so when loading it will 197 // be loaded at last. 198 long procId1 = procExec.submitProcedure(new TableSharedProcedureWithId()); 199 long procId2 = procExec.submitProcedure(new TableExclusiveProcedureWithId()); 200 procExec.startWorkers(); 201 UTIL.waitFor(10000, 202 () -> ((TableSharedProcedure) procExec.getProcedure(procId1)).latch.hasQueuedThreads()); 203 204 ProcedureTestingUtility.restart(procExec); 205 206 ((TableSharedProcedure) procExec.getProcedure(procId1)).latch.release(); 207 ((TableExclusiveProcedure) procExec.getProcedure(procId2)).latch.release(); 208 209 UTIL.waitFor(10000, () -> procExec.isFinished(procId1)); 210 UTIL.waitFor(10000, () -> procExec.isFinished(procId2)); 211 } 212 213 public static final class TableShardParentProcedure extends NoopProcedure<TestEnv> 214 implements TableProcedureInterface { 215 216 private boolean scheduled; 217 218 @Override 219 protected Procedure<TestEnv>[] execute(TestEnv env) 220 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 221 if (!scheduled) { 222 scheduled = true; 223 return new Procedure[] { new TableSharedProcedure() }; 224 } 225 return null; 226 } 227 228 @Override 229 protected LockState acquireLock(TestEnv env) { 230 if (env.getScheduler().waitTableSharedLock(this, getTableName())) { 231 return LockState.LOCK_EVENT_WAIT; 232 } 233 return LockState.LOCK_ACQUIRED; 234 } 235 236 @Override 237 protected void releaseLock(TestEnv env) { 238 env.getScheduler().wakeTableSharedLock(this, getTableName()); 239 } 240 241 @Override 242 protected boolean holdLock(TestEnv env) { 243 return true; 244 } 245 246 @Override 247 public TableName getTableName() { 248 return TABLE_NAME; 249 } 250 251 @Override 252 public TableOperationType getTableOperationType() { 253 return TableOperationType.READ; 254 } 255 } 256 257 @Test 258 public void testTableProcedureSubProcedureDeadLock() throws Exception { 259 // the shared procedure will also schedule a shared procedure, but after the exclusive procedure 260 long procId1 = procExec.submitProcedure(new TableShardParentProcedure()); 261 long procId2 = procExec.submitProcedure(new TableExclusiveProcedure()); 262 procExec.startWorkers(); 263 UTIL.waitFor(10000, 264 () -> procExec.getProcedures().stream().anyMatch(p -> p instanceof TableSharedProcedure)); 265 procExec.getProcedures().stream().filter(p -> p instanceof TableSharedProcedure) 266 .map(p -> (TableSharedProcedure) p).forEach(p -> p.latch.release()); 267 ((TableExclusiveProcedure) procExec.getProcedure(procId2)).latch.release(); 268 269 UTIL.waitFor(10000, () -> procExec.isFinished(procId1)); 270 UTIL.waitFor(10000, () -> procExec.isFinished(procId2)); 271 } 272}