001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.List;
022import java.util.concurrent.ConcurrentHashMap;
023import java.util.concurrent.atomic.AtomicInteger;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseTestingUtil;
026import org.apache.hadoop.hbase.NotServingRegionException;
027import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
028import org.apache.hadoop.hbase.StartTestingClusterOption;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
031import org.apache.hadoop.hbase.client.Durability;
032import org.apache.hadoop.hbase.client.Get;
033import org.apache.hadoop.hbase.client.Put;
034import org.apache.hadoop.hbase.client.Result;
035import org.apache.hadoop.hbase.client.Table;
036import org.apache.hadoop.hbase.client.TableDescriptor;
037import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
038import org.apache.hadoop.hbase.testclassification.FlakeyTests;
039import org.apache.hadoop.hbase.testclassification.LargeTests;
040import org.apache.hadoop.hbase.util.Bytes;
041import org.apache.hadoop.hbase.util.ConcurrentMapUtils;
042import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
043import org.junit.jupiter.api.AfterAll;
044import org.junit.jupiter.api.BeforeAll;
045import org.junit.jupiter.api.Tag;
046import org.junit.jupiter.api.Test;
047
048import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
049import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
050import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
051import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
052
053import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
056
057/**
058 * Test region replication when error occur.
059 * <p/>
060 * We can not simply move the secondary replicas as we will trigger a flush for the primary replica
061 * when secondary replica is online, which will always make the data of the two regions in sync. So
062 * here we need to simulate request errors.
063 */
064@Tag(FlakeyTests.TAG)
065@Tag(LargeTests.TAG)
066public class TestRegionReplicaReplicationError {
067  public static final class ErrorReplayRSRpcServices extends RSRpcServices {
068
069    private final ConcurrentHashMap<HRegion, AtomicInteger> regionToCounter =
070      new ConcurrentHashMap<HRegion, AtomicInteger>();
071
072    public ErrorReplayRSRpcServices(HRegionServer rs) throws IOException {
073      super(rs);
074    }
075
076    @Override
077    public ReplicateWALEntryResponse replicateToReplica(RpcController controller,
078      ReplicateWALEntryRequest request) throws ServiceException {
079      List<WALEntry> entries = request.getEntryList();
080      if (CollectionUtils.isEmpty(entries)) {
081        return ReplicateWALEntryResponse.getDefaultInstance();
082      }
083      ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
084      HRegion region;
085      try {
086        region = server.getRegionByEncodedName(regionName.toStringUtf8());
087      } catch (NotServingRegionException e) {
088        throw new ServiceException(e);
089      }
090
091      AtomicInteger counter =
092        ConcurrentMapUtils.computeIfAbsent(regionToCounter, region, () -> new AtomicInteger(0));
093
094      // fail the first several request
095      if (region.getRegionInfo().getReplicaId() == 1 && counter.addAndGet(entries.size()) < 100) {
096        throw new ServiceException("Inject error!");
097      }
098      return super.replicateToReplica(controller, request);
099    }
100  }
101
102  public static final class RSForTest
103    extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer {
104
105    public RSForTest(Configuration conf) throws IOException, InterruptedException {
106      super(conf);
107    }
108
109    @Override
110    protected RSRpcServices createRpcServices() throws IOException {
111      return new ErrorReplayRSRpcServices(this);
112    }
113  }
114
115  private static final HBaseTestingUtil HTU = new HBaseTestingUtil();
116
117  private static String TN = "test";
118
119  private static byte[] CF = Bytes.toBytes("cf");
120
121  private static byte[] CQ = Bytes.toBytes("cq");
122
123  @BeforeAll
124  public static void setUp() throws Exception {
125    HTU.getConfiguration().setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY,
126      true);
127    HTU.startMiniCluster(
128      StartTestingClusterOption.builder().rsClass(RSForTest.class).numRegionServers(3).build());
129  }
130
131  @AfterAll
132  public static void tearDown() throws Exception {
133    HTU.shutdownMiniCluster();
134  }
135
136  private boolean checkReplica(Table table, int replicaId) throws IOException {
137    boolean ret = true;
138    for (int i = 0; i < 500; i++) {
139      Result result = table.get(new Get(Bytes.toBytes(i)).setReplicaId(replicaId));
140      byte[] value = result.getValue(CF, CQ);
141      ret &= value != null && value.length > 0 && Bytes.toInt(value) == i;
142    }
143    return ret;
144  }
145
146  @Test
147  public void testDefaultDurability() throws IOException {
148    doTest(false);
149  }
150
151  @Test
152  public void testSkipWAL() throws IOException {
153    doTest(true);
154  }
155
156  private void doTest(boolean skipWAL) throws IOException {
157    TableName tableName = TableName.valueOf(TN + (skipWAL ? "_skipWAL" : ""));
158    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName)
159      .setRegionReplication(3).setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF));
160    if (skipWAL) {
161      builder.setDurability(Durability.SKIP_WAL);
162    }
163    TableDescriptor td = builder.build();
164    HTU.getAdmin().createTable(td);
165
166    try (Table table = HTU.getConnection().getTable(tableName)) {
167      for (int i = 0; i < 500; i++) {
168        table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
169      }
170      HTU.waitFor(30000, () -> checkReplica(table, 2));
171      HTU.waitFor(30000, () -> checkReplica(table, 1));
172    }
173  }
174}