001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.List; 022import java.util.concurrent.ConcurrentHashMap; 023import java.util.concurrent.atomic.AtomicInteger; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseTestingUtil; 026import org.apache.hadoop.hbase.NotServingRegionException; 027import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 028import org.apache.hadoop.hbase.StartTestingClusterOption; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 031import org.apache.hadoop.hbase.client.Durability; 032import org.apache.hadoop.hbase.client.Get; 033import org.apache.hadoop.hbase.client.Put; 034import org.apache.hadoop.hbase.client.Result; 035import org.apache.hadoop.hbase.client.Table; 036import org.apache.hadoop.hbase.client.TableDescriptor; 037import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 038import org.apache.hadoop.hbase.testclassification.FlakeyTests; 039import org.apache.hadoop.hbase.testclassification.LargeTests; 040import org.apache.hadoop.hbase.util.Bytes; 041import org.apache.hadoop.hbase.util.ConcurrentMapUtils; 042import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 043import org.junit.jupiter.api.AfterAll; 044import org.junit.jupiter.api.BeforeAll; 045import org.junit.jupiter.api.Tag; 046import org.junit.jupiter.api.Test; 047 048import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; 049import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 050import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 051import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 052 053import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest; 054import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry; 056 057/** 058 * Test region replication when error occur. 059 * <p/> 060 * We can not simply move the secondary replicas as we will trigger a flush for the primary replica 061 * when secondary replica is online, which will always make the data of the two regions in sync. So 062 * here we need to simulate request errors. 063 */ 064@Tag(FlakeyTests.TAG) 065@Tag(LargeTests.TAG) 066public class TestRegionReplicaReplicationError { 067 public static final class ErrorReplayRSRpcServices extends RSRpcServices { 068 069 private final ConcurrentHashMap<HRegion, AtomicInteger> regionToCounter = 070 new ConcurrentHashMap<HRegion, AtomicInteger>(); 071 072 public ErrorReplayRSRpcServices(HRegionServer rs) throws IOException { 073 super(rs); 074 } 075 076 @Override 077 public ReplicateWALEntryResponse replicateToReplica(RpcController controller, 078 ReplicateWALEntryRequest request) throws ServiceException { 079 List<WALEntry> entries = request.getEntryList(); 080 if (CollectionUtils.isEmpty(entries)) { 081 return ReplicateWALEntryResponse.getDefaultInstance(); 082 } 083 ByteString regionName = entries.get(0).getKey().getEncodedRegionName(); 084 HRegion region; 085 try { 086 region = server.getRegionByEncodedName(regionName.toStringUtf8()); 087 } catch (NotServingRegionException e) { 088 throw new ServiceException(e); 089 } 090 091 AtomicInteger counter = 092 ConcurrentMapUtils.computeIfAbsent(regionToCounter, region, () -> new AtomicInteger(0)); 093 094 // fail the first several request 095 if (region.getRegionInfo().getReplicaId() == 1 && counter.addAndGet(entries.size()) < 100) { 096 throw new ServiceException("Inject error!"); 097 } 098 return super.replicateToReplica(controller, request); 099 } 100 } 101 102 public static final class RSForTest 103 extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer { 104 105 public RSForTest(Configuration conf) throws IOException, InterruptedException { 106 super(conf); 107 } 108 109 @Override 110 protected RSRpcServices createRpcServices() throws IOException { 111 return new ErrorReplayRSRpcServices(this); 112 } 113 } 114 115 private static final HBaseTestingUtil HTU = new HBaseTestingUtil(); 116 117 private static String TN = "test"; 118 119 private static byte[] CF = Bytes.toBytes("cf"); 120 121 private static byte[] CQ = Bytes.toBytes("cq"); 122 123 @BeforeAll 124 public static void setUp() throws Exception { 125 HTU.getConfiguration().setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, 126 true); 127 HTU.startMiniCluster( 128 StartTestingClusterOption.builder().rsClass(RSForTest.class).numRegionServers(3).build()); 129 } 130 131 @AfterAll 132 public static void tearDown() throws Exception { 133 HTU.shutdownMiniCluster(); 134 } 135 136 private boolean checkReplica(Table table, int replicaId) throws IOException { 137 boolean ret = true; 138 for (int i = 0; i < 500; i++) { 139 Result result = table.get(new Get(Bytes.toBytes(i)).setReplicaId(replicaId)); 140 byte[] value = result.getValue(CF, CQ); 141 ret &= value != null && value.length > 0 && Bytes.toInt(value) == i; 142 } 143 return ret; 144 } 145 146 @Test 147 public void testDefaultDurability() throws IOException { 148 doTest(false); 149 } 150 151 @Test 152 public void testSkipWAL() throws IOException { 153 doTest(true); 154 } 155 156 private void doTest(boolean skipWAL) throws IOException { 157 TableName tableName = TableName.valueOf(TN + (skipWAL ? "_skipWAL" : "")); 158 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 159 .setRegionReplication(3).setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)); 160 if (skipWAL) { 161 builder.setDurability(Durability.SKIP_WAL); 162 } 163 TableDescriptor td = builder.build(); 164 HTU.getAdmin().createTable(td); 165 166 try (Table table = HTU.getConnection().getTable(tableName)) { 167 for (int i = 0; i < 500; i++) { 168 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i))); 169 } 170 HTU.waitFor(30000, () -> checkReplica(table, 2)); 171 HTU.waitFor(30000, () -> checkReplica(table, 1)); 172 } 173 } 174}