001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static junit.framework.TestCase.assertFalse;
021import static junit.framework.TestCase.assertNotNull;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertTrue;
025
026import java.io.IOException;
027import java.util.List;
028import org.apache.hadoop.hbase.CatalogFamilyFormat;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.MetaTableAccessor;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.Result;
038import org.apache.hadoop.hbase.client.Table;
039import org.apache.hadoop.hbase.master.HMaster;
040import org.apache.hadoop.hbase.master.RegionState;
041import org.apache.hadoop.hbase.procedure2.Procedure;
042import org.apache.hadoop.hbase.regionserver.HRegionServer;
043import org.apache.hadoop.hbase.testclassification.LargeTests;
044import org.apache.hadoop.hbase.testclassification.MasterTests;
045import org.apache.hadoop.hbase.util.Bytes;
046import org.apache.hadoop.hbase.util.Pair;
047import org.apache.hadoop.hbase.util.Threads;
048import org.junit.ClassRule;
049import org.junit.Rule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.junit.rules.TestName;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
057import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
058import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
059
060
061/**
062 * Test of the HBCK-version of SCP.
063 * The HBCKSCP is an SCP only it reads hbase:meta for list of Regions that were
064 * on the server-to-process rather than consult Master in-memory-state.
065 */
066@Category({ MasterTests.class, LargeTests.class })
067public class TestHBCKSCP extends TestSCPBase {
068  private static final Logger LOG = LoggerFactory.getLogger(TestHBCKSCP.class);
069
070  @ClassRule
071  public static final HBaseClassTestRule CLASS_RULE =
072      HBaseClassTestRule.forClass(TestHBCKSCP.class);
073  @Rule
074  public TestName name = new TestName();
075
076  @Test
077  public void test() throws Exception {
078    // we are about to do one for it?
079    SingleProcessHBaseCluster cluster = this.util.getHBaseCluster();
080
081    // Assert that we have three RegionServers. Test depends on there being multiple.
082    assertEquals(RS_COUNT, cluster.getLiveRegionServerThreads().size());
083
084    int count;
085    try (Table table = createTable(TableName.valueOf(this.name.getMethodName()))) {
086      // Load the table with a bit of data so some logs to split and some edits in each region.
087      this.util.loadTable(table, HBaseTestingUtil.COLUMNS[0]);
088      count = util.countRows(table);
089    }
090    assertTrue("expected some rows", count > 0);
091
092    // Make the test easier by not working on server hosting meta...
093    // Find another RS. Purge it from Master memory w/o running SCP (if
094    // SCP runs, it will clear entries from hbase:meta which frustrates
095    // our attempt at manufacturing 'Unknown Servers' condition).
096    int metaIndex = this.util.getMiniHBaseCluster().getServerWithMeta();
097    int rsIndex = (metaIndex + 1) % RS_COUNT;
098    ServerName rsServerName = cluster.getRegionServer(rsIndex).getServerName();
099    HMaster master = cluster.getMaster();
100    // Get a Region that is on the server.
101    RegionInfo rsRI = master.getAssignmentManager().getRegionsOnServer(rsServerName).get(0);
102    Result r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
103    // Assert region is OPEN.
104    assertEquals(RegionState.State.OPEN.toString(),
105        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
106    ServerName serverName = CatalogFamilyFormat.getServerName(r, 0);
107    assertEquals(rsServerName, serverName);
108    // moveFrom adds to dead servers and adds it to processing list only we will
109    // not be processing this server 'normally'. Remove it from processing by
110    // calling 'finish' and then remove it from dead servers so rsServerName
111    // becomes an 'Unknown Server' even though it is still around.
112    LOG.info("Killing {}", rsServerName);
113    cluster.killRegionServer(rsServerName);
114
115    master.getServerManager().moveFromOnlineToDeadServers(rsServerName);
116    master.getServerManager().getDeadServers().removeDeadServer(rsServerName);
117    master.getAssignmentManager().getRegionStates().removeServer(rsServerName);
118    // Kill the server. Nothing should happen since an 'Unknown Server' as far
119    // as the Master is concerned; i.e. no SCP.
120    HRegionServer hrs = cluster.getRegionServer(rsServerName);
121    while (!hrs.isStopped()) {
122      Threads.sleep(10);
123    }
124    LOG.info("Dead {}", rsServerName);
125    // Now assert still references in hbase:meta to the 'dead' server -- they haven't been
126    // cleaned up by an SCP or by anything else.
127    assertTrue(searchMeta(master, rsServerName));
128    // Assert region is OPEN on dead server still.
129    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
130    assertEquals(RegionState.State.OPEN.toString(),
131        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
132    serverName = CatalogFamilyFormat.getServerName(r, 0);
133    assertNotNull(cluster.getRegionServer(serverName));
134    assertEquals(rsServerName, serverName);
135
136    // I now have 'Unknown Server' references in hbase:meta; i.e. Server references
137    // with no corresponding SCP. Queue one.
138    long pid = scheduleHBCKSCP(rsServerName, master);
139    assertNotEquals(Procedure.NO_PROC_ID, pid);
140    while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) {
141      Threads.sleep(10);
142    }
143    // After SCP, assert region is OPEN on new server.
144    r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName());
145    assertEquals(RegionState.State.OPEN.toString(),
146        Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER)));
147    serverName = CatalogFamilyFormat.getServerName(r, 0);
148    assertNotNull(cluster.getRegionServer(serverName));
149    assertNotEquals(rsServerName, serverName);
150    // Make sure no mention of old server post SCP.
151    assertFalse(searchMeta(master, rsServerName));
152  }
153
154  protected long scheduleHBCKSCP(ServerName rsServerName, HMaster master) throws ServiceException {
155    MasterProtos.ScheduleServerCrashProcedureResponse response =
156        master.getMasterRpcServices().scheduleServerCrashProcedure(null,
157            MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder().
158                addServerName(ProtobufUtil.toServerName(rsServerName)).build());
159    assertEquals(1, response.getPidCount());
160    long pid = response.getPid(0);
161    return pid;
162  }
163
164  /**
165   * @return True if we find reference to <code>sn</code> in meta table.
166   */
167  private boolean searchMeta(HMaster master, ServerName sn) throws IOException {
168    List<Pair<RegionInfo, ServerName>> ps =
169      MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), null);
170    for (Pair<RegionInfo, ServerName> p: ps) {
171      if (p.getSecond().equals(sn)) {
172        return true;
173      }
174    }
175    return false;
176  }
177}