001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static junit.framework.TestCase.assertFalse;
021import static junit.framework.TestCase.assertNotNull;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertTrue;
025
026import java.io.IOException;
027import java.util.List;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.MiniHBaseCluster;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionInfo;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.Table;
038import org.apache.hadoop.hbase.master.HMaster;
039import org.apache.hadoop.hbase.master.RegionState;
040import org.apache.hadoop.hbase.procedure2.Procedure;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.testclassification.LargeTests;
043import org.apache.hadoop.hbase.testclassification.MasterTests;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.hadoop.hbase.util.Pair;
046import org.apache.hadoop.hbase.util.Threads;
047import org.junit.ClassRule;
048import org.junit.Rule;
049import org.junit.Test;
050import org.junit.experimental.categories.Category;
051import org.junit.rules.TestName;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
056
057import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
059
060/**
061 * Test of the HBCK-version of SCP. The HBCKSCP is an SCP only it reads hbase:meta for list of
062 * Regions that were on the server-to-process rather than consult Master in-memory-state.
063 */
064@Category({ MasterTests.class, LargeTests.class })
065public class TestHBCKSCP extends TestSCPBase {
066  private static final Logger LOG = LoggerFactory.getLogger(TestHBCKSCP.class);
067
068  @ClassRule
069  public static final HBaseClassTestRule CLASS_RULE =
070    HBaseClassTestRule.forClass(TestHBCKSCP.class);
071  @Rule
072  public TestName name = new TestName();
073
074  @Test
075  public void test() throws Exception {
076    // we are about to do one for it?
077    MiniHBaseCluster cluster = this.util.getHBaseCluster();
078
079    // Assert that we have three RegionServers. Test depends on there being multiple.
080    assertEquals(RS_COUNT, cluster.getLiveRegionServerThreads().size());
081
082    int count;
083    try (Table table = createTable(TableName.valueOf(this.name.getMethodName()))) {
084      // Load the table with a bit of data so some logs to split and some edits in each region.
085      this.util.loadTable(table, HBaseTestingUtility.COLUMNS[0]);
086      count = util.countRows(table);
087    }
088    assertTrue("expected some rows", count > 0);
089
090    // Make the test easier by not working on server hosting meta...
091    // Find another RS. Purge it from Master memory w/o running SCP (if
092    // SCP runs, it will clear entries from hbase:meta which frustrates
093    // our attempt at manufacturing 'Unknown Servers' condition).
094    int metaIndex = this.util.getMiniHBaseCluster().getServerWithMeta();
095    int rsIndex = (metaIndex + 1) % RS_COUNT;
096    ServerName rsServerName = cluster.getRegionServer(rsIndex).getServerName();
097    HMaster master = cluster.getMaster();
098    // Get a Region that is on the server.
099    RegionInfo rsRI = master.getAssignmentManager().getRegionsOnServer(rsServerName).get(0);
100    Result r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
101    // Assert region is OPEN.
102    assertEquals(RegionState.State.OPEN.toString(),
103      Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
104    ServerName serverName = MetaTableAccessor.getServerName(r, 0);
105    assertEquals(rsServerName, serverName);
106    // moveFrom adds to dead servers and adds it to processing list only we will
107    // not be processing this server 'normally'. Remove it from processing by
108    // calling 'finish' and then remove it from dead servers so rsServerName
109    // becomes an 'Unknown Server' even though it is still around.
110    LOG.info("Killing {}", rsServerName);
111    cluster.killRegionServer(rsServerName);
112
113    master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
114    master.getServerManager().getDeadServers().finish(rsServerName);
115    master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
116    master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
117    // Kill the server. Nothing should happen since an 'Unknown Server' as far
118    // as the Master is concerned; i.e. no SCP.
119    HRegionServer hrs = cluster.getRegionServer(rsServerName);
120    while (!hrs.isStopped()) {
121      Threads.sleep(10);
122    }
123    LOG.info("Dead {}", rsServerName);
124    // Now assert still references in hbase:meta to the 'dead' server -- they haven't been
125    // cleaned up by an SCP or by anything else.
126    assertTrue(searchMeta(master, rsServerName));
127    // Assert region is OPEN on dead server still.
128    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
129    assertEquals(RegionState.State.OPEN.toString(),
130      Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
131    serverName = MetaTableAccessor.getServerName(r, 0);
132    assertNotNull(cluster.getRegionServer(serverName));
133    assertEquals(rsServerName, serverName);
134
135    // I now have 'Unknown Server' references in hbase:meta; i.e. Server references
136    // with no corresponding SCP. Queue one.
137    long pid = scheduleHBCKSCP(rsServerName, master);
138    assertNotEquals(Procedure.NO_PROC_ID, pid);
139    while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) {
140      Threads.sleep(10);
141    }
142    // After SCP, assert region is OPEN on new server.
143    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
144    assertEquals(RegionState.State.OPEN.toString(),
145      Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
146    serverName = MetaTableAccessor.getServerName(r, 0);
147    assertNotNull(cluster.getRegionServer(serverName));
148    assertNotEquals(rsServerName, serverName);
149    // Make sure no mention of old server post SCP.
150    assertFalse(searchMeta(master, rsServerName));
151  }
152
153  protected long scheduleHBCKSCP(ServerName rsServerName, HMaster master) throws ServiceException {
154    MasterProtos.ScheduleServerCrashProcedureResponse response = master.getMasterRpcServices()
155      .scheduleServerCrashProcedure(null, MasterProtos.ScheduleServerCrashProcedureRequest
156        .newBuilder().addServerName(ProtobufUtil.toServerName(rsServerName)).build());
157    assertEquals(1, response.getPidCount());
158    long pid = response.getPid(0);
159    return pid;
160  }
161
162  /** Returns True if we find reference to <code>sn</code> in meta table. */
163  private boolean searchMeta(HMaster master, ServerName sn) throws IOException {
164    List<Pair<RegionInfo, ServerName>> ps =
165      MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), null);
166    for (Pair<RegionInfo, ServerName> p : ps) {
167      if (p.getSecond().equals(sn)) {
168        return true;
169      }
170    }
171    return false;
172  }
173}