001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static junit.framework.TestCase.assertFalse;
021import static junit.framework.TestCase.assertNotNull;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertTrue;
025
026import java.io.IOException;
027import java.util.List;
028
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtility;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.MetaTableAccessor;
033import org.apache.hadoop.hbase.MiniHBaseCluster;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.Result;
038import org.apache.hadoop.hbase.client.Table;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.RegionState;
041import org.apache.hadoop.hbase.procedure2.Procedure;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
045
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.MasterTests;
048
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.Threads;
052import org.junit.ClassRule;
053import org.junit.Rule;
054import org.junit.Test;
055import org.junit.experimental.categories.Category;
056import org.junit.rules.TestName;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060
061/**
062 * Test of the HBCK-version of SCP.
063 * The HBCKSCP is an SCP only it reads hbase:meta for list of Regions that were
064 * on the server-to-process rather than consult Master in-memory-state.
065 */
066@Category({ MasterTests.class, LargeTests.class })
067public class TestHBCKSCP extends TestSCPBase {
068  private static final Logger LOG = LoggerFactory.getLogger(TestHBCKSCP.class);
069
070  @ClassRule
071  public static final HBaseClassTestRule CLASS_RULE =
072      HBaseClassTestRule.forClass(TestHBCKSCP.class);
073  @Rule
074  public TestName name = new TestName();
075
076  @Test
077  public void test() throws Exception {
078    // we are about to do one for it?
079    MiniHBaseCluster cluster = this.util.getHBaseCluster();
080
081    // Assert that we have three RegionServers. Test depends on there being multiple.
082    assertEquals(RS_COUNT, cluster.getLiveRegionServerThreads().size());
083
084    int count;
085    try (Table table = createTable(TableName.valueOf(this.name.getMethodName()))) {
086      // Load the table with a bit of data so some logs to split and some edits in each region.
087      this.util.loadTable(table, HBaseTestingUtility.COLUMNS[0]);
088      count = util.countRows(table);
089    }
090    assertTrue("expected some rows", count > 0);
091
092    // Make the test easier by not working on server hosting meta...
093    // Find another RS. Purge it from Master memory w/o running SCP (if
094    // SCP runs, it will clear entries from hbase:meta which frustrates
095    // our attempt at manufacturing 'Unknown Servers' condition).
096    int metaIndex = this.util.getMiniHBaseCluster().getServerWithMeta();
097    int rsIndex = (metaIndex + 1) % RS_COUNT;
098    ServerName rsServerName = cluster.getRegionServer(rsIndex).getServerName();
099    HMaster master = cluster.getMaster();
100    // Get a Region that is on the server.
101    RegionInfo rsRI = master.getAssignmentManager().getRegionsOnServer(rsServerName).get(0);
102    Result r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
103    // Assert region is OPEN.
104    assertEquals(RegionState.State.OPEN.toString(),
105        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
106    ServerName serverName = MetaTableAccessor.getServerName(r, 0);
107    assertTrue(rsServerName.equals(serverName));
108    // moveFrom adds to dead servers and adds it to processing list only we will
109    // not be processing this server 'normally'. Remove it from processing by
110    // calling 'finish' and then remove it from dead servers so rsServerName
111    // becomes an 'Unknown Server' even though it is still around.
112    master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
113    master.getServerManager().getDeadServers().finish(rsServerName);
114    master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
115    master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
116    // Kill the server. Nothing should happen since an 'Unknown Server' as far
117    // as the Master is concerned; i.e. no SCP.
118    LOG.info("Killing {}", rsServerName);
119    HRegionServer hrs = cluster.getRegionServer(rsServerName);
120    hrs.abort("KILLED");
121    while (!hrs.isStopped()) {
122      Threads.sleep(10);
123    }
124    LOG.info("Dead {}", rsServerName);
125    // Now assert still references in hbase:meta to the 'dead' server -- they haven't been
126    // cleaned up by an SCP or by anything else.
127    assertTrue(searchMeta(master, rsServerName));
128    // Assert region is OPEN on dead server still.
129    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
130    assertEquals(RegionState.State.OPEN.toString(),
131        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
132    serverName = MetaTableAccessor.getServerName(r, 0);
133    assertNotNull(cluster.getRegionServer(serverName));
134    assertEquals(rsServerName, serverName);
135
136    // I now have 'Unknown Server' references in hbase:meta; i.e. Server references
137    // with no corresponding SCP. Queue one.
138    MasterProtos.ScheduleServerCrashProcedureResponse response =
139        master.getMasterRpcServices().scheduleServerCrashProcedure(null,
140            MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder().
141                addServerName(ProtobufUtil.toServerName(rsServerName)).build());
142    assertEquals(1, response.getPidCount());
143    long pid = response.getPid(0);
144    assertNotEquals(Procedure.NO_PROC_ID, pid);
145    while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) {
146      Threads.sleep(10);
147    }
148    // After SCP, assert region is OPEN on new server.
149    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
150    assertEquals(RegionState.State.OPEN.toString(),
151        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
152    serverName = MetaTableAccessor.getServerName(r, 0);
153    assertNotNull(cluster.getRegionServer(serverName));
154    assertNotEquals(rsServerName, serverName);
155    // Make sure no mention of old server post SCP.
156    assertFalse(searchMeta(master, rsServerName));
157    assertFalse(master.getServerManager().getDeadServers().isProcessingServer(rsServerName));
158    assertFalse(master.getServerManager().getDeadServers().isDeadServer(rsServerName));
159  }
160
161  /**
162   * @return True if we find reference to <code>sn</code> in meta table.
163   */
164  boolean searchMeta(HMaster master, ServerName sn) throws IOException {
165    List<Pair<RegionInfo, ServerName>> ps =
166      MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), null);
167    for (Pair<RegionInfo, ServerName> p: ps) {
168      if (p.getSecond().equals(sn)) {
169        return true;
170      }
171    }
172    return false;
173  }
174}