001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.jupiter.api.Assertions.assertFalse;
021import static org.junit.jupiter.api.Assertions.assertNull;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collections;
027import java.util.List;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.hbase.HBaseTestingUtil;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.StartTestingClusterOption;
032import org.apache.hadoop.hbase.master.HMaster;
033import org.apache.hadoop.hbase.master.MasterServices;
034import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
035import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
036import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
037import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
038import org.apache.hadoop.hbase.master.region.MasterRegion;
039import org.apache.hadoop.hbase.procedure2.Procedure;
040import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
041import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
042import org.apache.hadoop.hbase.testclassification.MediumTests;
043import org.apache.hadoop.hbase.testclassification.MiscTests;
044import org.junit.jupiter.api.AfterAll;
045import org.junit.jupiter.api.BeforeAll;
046import org.junit.jupiter.api.Tag;
047import org.junit.jupiter.api.Test;
048
049@Tag(MiscTests.TAG)
050@Tag(MediumTests.TAG)
051public class TestFailedMetaReplicaAssigment {
052
053  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
054
055  @BeforeAll
056  public static void setUp() throws Exception {
057    // using our rigged master, to force a failed meta replica assignment when start up master
058    // this test can be removed once we remove the HConstants.META_REPLICAS_NUM config.
059    Configuration conf = TEST_UTIL.getConfiguration();
060    conf.setInt(HConstants.META_REPLICAS_NUM, 3);
061    StartTestingClusterOption option =
062      StartTestingClusterOption.builder().numAlwaysStandByMasters(1).numMasters(1)
063        .numRegionServers(1).masterClass(BrokenMetaReplicaMaster.class).build();
064    TEST_UTIL.startMiniCluster(option);
065  }
066
067  @AfterAll
068  public static void tearDown() throws IOException {
069    TEST_UTIL.shutdownMiniCluster();
070  }
071
072  @Test
073  public void testFailedReplicaAssignment() throws InterruptedException {
074    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
075    // waiting for master to come up
076    TEST_UTIL.waitFor(30000, () -> master.isInitialized());
077
078    AssignmentManager am = master.getAssignmentManager();
079    // showing one of the replicas got assigned
080    RegionInfo metaReplicaHri =
081      RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, 1);
082    // we use assignAsync so we need to wait a bit
083    TEST_UTIL.waitFor(30000, () -> {
084      RegionStateNode metaReplicaRegionNode =
085        am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri);
086      return metaReplicaRegionNode.getRegionLocation() != null;
087    });
088    // showing one of the replicas failed to be assigned
089    RegionInfo metaReplicaHri2 =
090      RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, 2);
091    RegionStateNode metaReplicaRegionNode2 =
092      am.getRegionStates().getOrCreateRegionStateNode(metaReplicaHri2);
093    // wait for several seconds to make sure that it is not assigned
094    for (int i = 0; i < 3; i++) {
095      Thread.sleep(2000);
096      assertNull(metaReplicaRegionNode2.getRegionLocation());
097    }
098
099    // showing master is active and running
100    assertFalse(master.isStopping());
101    assertFalse(master.isStopped());
102    assertTrue(master.isActiveMaster());
103  }
104
105  public static class BrokenTransitRegionStateProcedure extends TransitRegionStateProcedure {
106
107    public BrokenTransitRegionStateProcedure() {
108      super(null, null, null, false, TransitionType.ASSIGN);
109    }
110
111    public BrokenTransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri) {
112      super(env, hri, null, false, TransitionType.ASSIGN);
113    }
114
115    @Override
116    protected Procedure[] execute(MasterProcedureEnv env)
117      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
118      throw new ProcedureSuspendedException("Never end procedure!");
119    }
120  }
121
122  public static class BrokenMetaReplicaMaster extends HMaster {
123    public BrokenMetaReplicaMaster(final Configuration conf) throws IOException {
124      super(conf);
125    }
126
127    @Override
128    public AssignmentManager createAssignmentManager(MasterServices master,
129      MasterRegion masterRegion) {
130      return new BrokenMasterMetaAssignmentManager(master, masterRegion);
131    }
132  }
133
134  public static class BrokenMasterMetaAssignmentManager extends AssignmentManager {
135    MasterServices master;
136
137    public BrokenMasterMetaAssignmentManager(final MasterServices master,
138      MasterRegion masterRegion) {
139      super(master, masterRegion);
140      this.master = master;
141    }
142
143    @Override
144    public TransitRegionStateProcedure[] createAssignProcedures(List<RegionInfo> hris) {
145      List<TransitRegionStateProcedure> procs = new ArrayList<>();
146      for (RegionInfo hri : hris) {
147        if (hri.isMetaRegion() && hri.getReplicaId() == 2) {
148          RegionStateNode regionNode = getRegionStates().getOrCreateRegionStateNode(hri);
149          regionNode.lock();
150          try {
151            procs.add(regionNode.setProcedure(new BrokenTransitRegionStateProcedure(
152              master.getMasterProcedureExecutor().getEnvironment(), hri)));
153          } finally {
154            regionNode.unlock();
155          }
156        } else {
157          procs.add(super.createAssignProcedures(Collections.singletonList(hri))[0]);
158        }
159      }
160      return procs.toArray(new TransitRegionStateProcedure[0]);
161    }
162  }
163}