001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static junit.framework.TestCase.assertFalse; 021import static junit.framework.TestCase.assertNotNull; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertTrue; 025 026import java.io.IOException; 027import java.util.List; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtility; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.MiniHBaseCluster; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.client.Result; 037import org.apache.hadoop.hbase.client.Table; 038import org.apache.hadoop.hbase.master.HMaster; 039import org.apache.hadoop.hbase.master.RegionState; 040import org.apache.hadoop.hbase.procedure2.Procedure; 041import org.apache.hadoop.hbase.regionserver.HRegionServer; 042import org.apache.hadoop.hbase.testclassification.LargeTests; 043import org.apache.hadoop.hbase.testclassification.MasterTests; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.hbase.util.Pair; 046import org.apache.hadoop.hbase.util.Threads; 047import org.junit.ClassRule; 048import org.junit.Rule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.junit.rules.TestName; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 056 057import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 058import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos; 059 060/** 061 * Test of the HBCK-version of SCP. The HBCKSCP is an SCP only it reads hbase:meta for list of 062 * Regions that were on the server-to-process rather than consult Master in-memory-state. 063 */ 064@Category({ MasterTests.class, LargeTests.class }) 065public class TestHBCKSCP extends TestSCPBase { 066 private static final Logger LOG = LoggerFactory.getLogger(TestHBCKSCP.class); 067 068 @ClassRule 069 public static final HBaseClassTestRule CLASS_RULE = 070 HBaseClassTestRule.forClass(TestHBCKSCP.class); 071 @Rule 072 public TestName name = new TestName(); 073 074 @Test 075 public void test() throws Exception { 076 // we are about to do one for it? 077 MiniHBaseCluster cluster = this.util.getHBaseCluster(); 078 079 // Assert that we have three RegionServers. Test depends on there being multiple. 080 assertEquals(RS_COUNT, cluster.getLiveRegionServerThreads().size()); 081 082 int count; 083 try (Table table = createTable(TableName.valueOf(this.name.getMethodName()))) { 084 // Load the table with a bit of data so some logs to split and some edits in each region. 085 this.util.loadTable(table, HBaseTestingUtility.COLUMNS[0]); 086 count = util.countRows(table); 087 } 088 assertTrue("expected some rows", count > 0); 089 090 // Make the test easier by not working on server hosting meta... 091 // Find another RS. Purge it from Master memory w/o running SCP (if 092 // SCP runs, it will clear entries from hbase:meta which frustrates 093 // our attempt at manufacturing 'Unknown Servers' condition). 094 int metaIndex = this.util.getMiniHBaseCluster().getServerWithMeta(); 095 int rsIndex = (metaIndex + 1) % RS_COUNT; 096 ServerName rsServerName = cluster.getRegionServer(rsIndex).getServerName(); 097 HMaster master = cluster.getMaster(); 098 // Get a Region that is on the server. 099 RegionInfo rsRI = master.getAssignmentManager().getRegionsOnServer(rsServerName).get(0); 100 Result r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 101 // Assert region is OPEN. 102 assertEquals(RegionState.State.OPEN.toString(), 103 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 104 ServerName serverName = MetaTableAccessor.getServerName(r, 0); 105 assertEquals(rsServerName, serverName); 106 // moveFrom adds to dead servers and adds it to processing list only we will 107 // not be processing this server 'normally'. Remove it from processing by 108 // calling 'finish' and then remove it from dead servers so rsServerName 109 // becomes an 'Unknown Server' even though it is still around. 110 LOG.info("Killing {}", rsServerName); 111 cluster.killRegionServer(rsServerName); 112 113 master.getServerManager().moveFromOnlineToDeadServers(rsServerName); 114 master.getServerManager().getDeadServers().finish(rsServerName); 115 master.getServerManager().getDeadServers().removeDeadServer(rsServerName); 116 master.getAssignmentManager().getRegionStates().removeServer(rsServerName); 117 // Kill the server. Nothing should happen since an 'Unknown Server' as far 118 // as the Master is concerned; i.e. no SCP. 119 HRegionServer hrs = cluster.getRegionServer(rsServerName); 120 while (!hrs.isStopped()) { 121 Threads.sleep(10); 122 } 123 LOG.info("Dead {}", rsServerName); 124 // Now assert still references in hbase:meta to the 'dead' server -- they haven't been 125 // cleaned up by an SCP or by anything else. 126 assertTrue(searchMeta(master, rsServerName)); 127 // Assert region is OPEN on dead server still. 128 r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 129 assertEquals(RegionState.State.OPEN.toString(), 130 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 131 serverName = MetaTableAccessor.getServerName(r, 0); 132 assertNotNull(cluster.getRegionServer(serverName)); 133 assertEquals(rsServerName, serverName); 134 135 // I now have 'Unknown Server' references in hbase:meta; i.e. Server references 136 // with no corresponding SCP. Queue one. 137 long pid = scheduleHBCKSCP(rsServerName, master); 138 assertNotEquals(Procedure.NO_PROC_ID, pid); 139 while (master.getMasterProcedureExecutor().getActiveProcIds().contains(pid)) { 140 Threads.sleep(10); 141 } 142 // After SCP, assert region is OPEN on new server. 143 r = MetaTableAccessor.getRegionResult(master.getConnection(), rsRI.getRegionName()); 144 assertEquals(RegionState.State.OPEN.toString(), 145 Bytes.toString(r.getValue(HConstants.CATALOG_FAMILY, HConstants.STATE_QUALIFIER))); 146 serverName = MetaTableAccessor.getServerName(r, 0); 147 assertNotNull(cluster.getRegionServer(serverName)); 148 assertNotEquals(rsServerName, serverName); 149 // Make sure no mention of old server post SCP. 150 assertFalse(searchMeta(master, rsServerName)); 151 } 152 153 protected long scheduleHBCKSCP(ServerName rsServerName, HMaster master) throws ServiceException { 154 MasterProtos.ScheduleServerCrashProcedureResponse response = master.getMasterRpcServices() 155 .scheduleServerCrashProcedure(null, MasterProtos.ScheduleServerCrashProcedureRequest 156 .newBuilder().addServerName(ProtobufUtil.toServerName(rsServerName)).build()); 157 assertEquals(1, response.getPidCount()); 158 long pid = response.getPid(0); 159 return pid; 160 } 161 162 /** Returns True if we find reference to <code>sn</code> in meta table. */ 163 private boolean searchMeta(HMaster master, ServerName sn) throws IOException { 164 List<Pair<RegionInfo, ServerName>> ps = 165 MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), null); 166 for (Pair<RegionInfo, ServerName> p : ps) { 167 if (p.getSecond().equals(sn)) { 168 return true; 169 } 170 } 171 return false; 172 } 173}