001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static junit.framework.TestCase.assertFalse; 021import static junit.framework.TestCase.assertNotNull; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertTrue; 025 026import java.io.IOException; 027import java.util.List; 028 029import org.apache.hadoop.hbase.HBaseClassTestRule; 030import org.apache.hadoop.hbase.HBaseTestingUtility; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.MetaTableAccessor; 033import org.apache.hadoop.hbase.MiniHBaseCluster; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.Result; 038import org.apache.hadoop.hbase.client.Table; 039import org.apache.hadoop.hbase.master.HMaster; 040import org.apache.hadoop.hbase.master.RegionState; 041import org.apache.hadoop.hbase.procedure2.Procedure; 042import org.apache.hadoop.hbase.regionserver.HRegionServer; 043import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; 045 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.testclassification.MasterTests; 048 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.hadoop.hbase.util.Threads; 052import org.junit.ClassRule; 053import org.junit.Rule; 054import org.junit.Test; 055import org.junit.experimental.categories.Category; 056import org.junit.rules.TestName; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060 061/** 062 * Test of the HBCK-version of SCP. 063 * The HBCKSCP is an SCP only it reads hbase:meta for list of Regions that were 064 * on the server-to-process rather than consult Master in-memory-state. 065 */ 066@Category({ MasterTests.class, LargeTests.class }) 067public class TestHBCKSCP extends TestSCPBase { 068 private static final Logger LOG = LoggerFactory.getLogger(TestHBCKSCP.class); 069 070 @ClassRule 071 public static final HBaseClassTestRule CLASS_RULE = 072 HBaseClassTestRule.forClass(TestHBCKSCP.class); 073 @Rule 074 public TestName name = new TestName(); 075 076 @Test 077 public void test() throws Exception { 078 // we are about to do one for it? 079 MiniHBaseCluster cluster = this.util.getHBaseCluster(); 080 081 // Assert that we have three RegionServers. Test depends on there being multiple. 082 assertEquals(RS_COUNT, cluster.getLiveRegionServerThreads().size()); 083 084 int count; 085 try (Table table = createTable(TableName.valueOf(this.name.getMethodName()))) { 086 // Load the table with a bit of data so some logs to split and some edits in each region. 087 this.util.loadTable(table, HBaseTestingUtility.COLUMNS[0]); 088 count = util.countRows(table); 089 } 090 assertTrue("expected some rows", count > 0); 091 092 // Make the test easier by not working on server hosting meta... 093 // Find another RS. Purge it from Master memory w/o running SCP (if 094 // SCP runs, it will clear entries from hbase:meta which frustrates 095 // our attempt at manufacturing 'Unknown Servers' condition). 096 int metaIndex = this.util.getMiniHBaseCluster().getServerWithMeta(); 097 int rsIndex = (metaIndex + 1) % RS_COUNT; 098 ServerName rsServerName = cluster.getRegionServer(rsIndex).getServerName(); 099 HMaster master = cluster.getMaster(); 100 // Get a Region that is on the server. 101 RegionInfo rsRI = master.getAssignmentManager().getRegionsOnServer(rsServerName).get(0); 102 Result r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 103 // Assert region is OPEN. 104 assertEquals(RegionState.State.OPEN.toString(), 105 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 106 ServerName serverName = MetaTableAccessor.getServerName(r, 0); 107 assertEquals(rsServerName, serverName); 108 // moveFrom adds to dead servers and adds it to processing list only we will 109 // not be processing this server 'normally'. Remove it from processing by 110 // calling 'finish' and then remove it from dead servers so rsServerName 111 // becomes an 'Unknown Server' even though it is still around. 112 master.getServerManager().moveFromOnlineToDeadServers(rsServerName); 113 master.getServerManager().getDeadServers().finish(rsServerName); 114 master.getServerManager().getDeadServers().removeDeadServer(rsServerName); 115 master.getAssignmentManager().getRegionStates().removeServer(rsServerName); 116 // Kill the server. Nothing should happen since an 'Unknown Server' as far 117 // as the Master is concerned; i.e. no SCP. 118 LOG.info("Killing {}", rsServerName); 119 HRegionServer hrs = cluster.getRegionServer(rsServerName); 120 hrs.abort("KILLED"); 121 while (!hrs.isStopped()) { 122 Threads.sleep(10); 123 } 124 LOG.info("Dead {}", rsServerName); 125 // Now assert still references in hbase:meta to the 'dead' server -- they haven't been 126 // cleaned up by an SCP or by anything else. 127 assertTrue(searchMeta(master, rsServerName)); 128 // Assert region is OPEN on dead server still. 129 r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 130 assertEquals(RegionState.State.OPEN.toString(), 131 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 132 serverName = MetaTableAccessor.getServerName(r, 0); 133 assertNotNull(cluster.getRegionServer(serverName)); 134 assertEquals(rsServerName, serverName); 135 136 // I now have 'Unknown Server' references in hbase:meta; i.e. Server references 137 // with no corresponding SCP. Queue one. 138 MasterProtos.ScheduleServerCrashProcedureResponse response = 139 master.getMasterRpcServices().scheduleServerCrashProcedure(null, 140 MasterProtos.ScheduleServerCrashProcedureRequest.newBuilder(). 141 addServerName(ProtobufUtil.toServerName(rsServerName)).build()); 142 assertEquals(1, response.getPidCount()); 143 long pid = response.getPid(0); 144 assertNotEquals(Procedure.NO_PROC_ID, pid); 145 while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) { 146 Threads.sleep(10); 147 } 148 // After SCP, assert region is OPEN on new server. 149 r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 150 assertEquals(RegionState.State.OPEN.toString(), 151 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 152 serverName = MetaTableAccessor.getServerName(r, 0); 153 assertNotNull(cluster.getRegionServer(serverName)); 154 assertNotEquals(rsServerName, serverName); 155 // Make sure no mention of old server post SCP. 156 assertFalse(searchMeta(master, rsServerName)); 157 } 158 159 /** 160 * @return True if we find reference to <code>sn</code> in meta table. 161 */ 162 private boolean searchMeta(HMaster master, ServerName sn) throws IOException { 163 List<Pair<RegionInfo, ServerName>> ps = 164 MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), null); 165 for (Pair<RegionInfo, ServerName> p: ps) { 166 if (p.getSecond().equals(sn)) { 167 return true; 168 } 169 } 170 return false; 171 } 172}