001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.replication;
019
020import static org.hamcrest.MatcherAssert.*;
021import static org.hamcrest.Matchers.*;
022import static org.junit.Assert.assertEquals;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.ServerName;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.Table;
033import org.apache.hadoop.hbase.master.HMaster;
034import org.apache.hadoop.hbase.master.MasterServices;
035import org.apache.hadoop.hbase.master.RegionServerList;
036import org.apache.hadoop.hbase.master.ServerManager;
037import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
038import org.apache.hadoop.hbase.master.replication.AssignReplicationQueuesProcedure;
039import org.apache.hadoop.hbase.master.replication.RemovePeerProcedure;
040import org.apache.hadoop.hbase.procedure2.Procedure;
041import org.apache.hadoop.hbase.testclassification.LargeTests;
042import org.apache.hadoop.hbase.testclassification.ReplicationTests;
043import org.junit.AfterClass;
044import org.junit.BeforeClass;
045import org.junit.ClassRule;
046import org.junit.Test;
047import org.junit.experimental.categories.Category;
048
049import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
050
051import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.PeerModificationState;
052import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
053
054/**
055 * Make sure we will wait until all the SCPs finished in RemovePeerProcedure.
056 * <p/>
057 * See HBASE-27109 for more details.
058 */
059@Category({ ReplicationTests.class, LargeTests.class })
060public class TestRemovePeerProcedureWaitForSCP extends TestReplicationBase {
061
062  @ClassRule
063  public static final HBaseClassTestRule CLASS_RULE =
064    HBaseClassTestRule.forClass(TestRemovePeerProcedureWaitForSCP.class);
065
066  private static final TableName tableName3 = TableName.valueOf("test3");
067
068  private static final String PEER_ID3 = "3";
069
070  private static Table table3;
071
072  private static volatile boolean EMPTY = false;
073
074  public static final class ServerManagerForTest extends ServerManager {
075
076    public ServerManagerForTest(MasterServices master, RegionServerList storage) {
077      super(master, storage);
078    }
079
080    @Override
081    public List<ServerName> getOnlineServersList() {
082      // return no region server to make the procedure hang
083      if (EMPTY) {
084        for (StackTraceElement e : Thread.currentThread().getStackTrace()) {
085          if (e.getClassName().equals(AssignReplicationQueuesProcedure.class.getName())) {
086            return Collections.emptyList();
087          }
088        }
089      }
090      return super.getOnlineServersList();
091    }
092  }
093
094  public static final class HMasterForTest extends HMaster {
095
096    public HMasterForTest(Configuration conf) throws IOException {
097      super(conf);
098    }
099
100    @Override
101    protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
102      throws IOException {
103      setupClusterConnection();
104      return new ServerManagerForTest(master, storage);
105    }
106  }
107
108  @BeforeClass
109  public static void setUpBeforeClass() throws Exception {
110    CONF1.setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
111    TestReplicationBase.setUpBeforeClass();
112    createTable(tableName3);
113    table3 = connection1.getTable(tableName3);
114  }
115
116  @Override
117  public void setUpBase() throws Exception {
118    super.setUpBase();
119    // set up two replication peers and only 1 rs to test claim replication queue with multiple
120    // round
121    addPeer(PEER_ID3, tableName3);
122  }
123
124  @Override
125  public void tearDownBase() throws Exception {
126    super.tearDownBase();
127    removePeer(PEER_ID3);
128  }
129
130  @AfterClass
131  public static void tearDownAfterClass() throws Exception {
132    Closeables.close(table3, true);
133    TestReplicationBase.tearDownAfterClass();
134  }
135
136  @Test
137  public void testWait() throws Exception {
138    // disable the peers
139    hbaseAdmin.disableReplicationPeer(PEER_ID2);
140    hbaseAdmin.disableReplicationPeer(PEER_ID3);
141
142    // put some data
143    UTIL1.loadTable(htable1, famName);
144    UTIL1.loadTable(table3, famName);
145
146    EMPTY = true;
147    UTIL1.getMiniHBaseCluster().stopRegionServer(0).join();
148    UTIL1.getMiniHBaseCluster().startRegionServer();
149
150    // since there is no active region server to get the replication queue, the procedure should be
151    // in WAITING_TIMEOUT state for most time to retry
152    HMaster master = UTIL1.getMiniHBaseCluster().getMaster();
153    UTIL1.waitFor(30000,
154      () -> master.getProcedures().stream()
155        .filter(p -> p instanceof AssignReplicationQueuesProcedure)
156        .anyMatch(p -> p.getState() == ProcedureState.WAITING_TIMEOUT));
157
158    // call remove replication peer, and make sure it will be stuck in the POST_PEER_MODIFICATION
159    // state.
160    hbaseAdmin.removeReplicationPeerAsync(PEER_ID3);
161    UTIL1.waitFor(30000,
162      () -> master.getProcedures().stream().filter(p -> p instanceof RemovePeerProcedure)
163        .anyMatch(p -> ((RemovePeerProcedure) p).getCurrentStateId()
164            == PeerModificationState.POST_PEER_MODIFICATION_VALUE));
165    Thread.sleep(5000);
166    assertEquals(PeerModificationState.POST_PEER_MODIFICATION_VALUE,
167      ((RemovePeerProcedure) master.getProcedures().stream()
168        .filter(p -> p instanceof RemovePeerProcedure).findFirst().get()).getCurrentStateId());
169    EMPTY = false;
170    // wait until the SCP finished, AssignReplicationQueuesProcedure is a sub procedure of SCP
171    UTIL1.waitFor(30000, () -> master.getProcedures().stream()
172      .filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isSuccess));
173    // the RemovePeerProcedure should have also finished
174    UTIL1.waitFor(30000, () -> master.getProcedures().stream()
175      .filter(p -> p instanceof RemovePeerProcedure).allMatch(Procedure::isSuccess));
176    // make sure there is no remaining replication queues for PEER_ID3
177    assertThat(master.getReplicationPeerManager().getQueueStorage().listAllQueueIds(PEER_ID3),
178      empty());
179  }
180}