001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.List;
022import java.util.concurrent.ConcurrentHashMap;
023import java.util.concurrent.atomic.AtomicInteger;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.NotServingRegionException;
028import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
029import org.apache.hadoop.hbase.StartTestingClusterOption;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Durability;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.Put;
035import org.apache.hadoop.hbase.client.Result;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.client.TableDescriptor;
038import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
039import org.apache.hadoop.hbase.testclassification.FlakeyTests;
040import org.apache.hadoop.hbase.testclassification.LargeTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.apache.hadoop.hbase.util.ConcurrentMapUtils;
043import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
044import org.junit.AfterClass;
045import org.junit.BeforeClass;
046import org.junit.ClassRule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049
050import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
051import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
052import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
053import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
054
055import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
057import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
058
059/**
060 * Test region replication when error occur.
061 * <p/>
062 * We can not simply move the secondary replicas as we will trigger a flush for the primary replica
063 * when secondary replica is online, which will always make the data of the two regions in sync. So
064 * here we need to simulate request errors.
065 */
066@Category({ FlakeyTests.class, LargeTests.class })
067public class TestRegionReplicaReplicationError {
068
069  @ClassRule
070  public static final HBaseClassTestRule CLASS_RULE =
071    HBaseClassTestRule.forClass(TestRegionReplicaReplicationError.class);
072
073  public static final class ErrorReplayRSRpcServices extends RSRpcServices {
074
075    private final ConcurrentHashMap<HRegion, AtomicInteger> regionToCounter =
076      new ConcurrentHashMap<HRegion, AtomicInteger>();
077
078    public ErrorReplayRSRpcServices(HRegionServer rs) throws IOException {
079      super(rs);
080    }
081
082    @Override
083    public ReplicateWALEntryResponse replicateToReplica(RpcController controller,
084      ReplicateWALEntryRequest request) throws ServiceException {
085      List<WALEntry> entries = request.getEntryList();
086      if (CollectionUtils.isEmpty(entries)) {
087        return ReplicateWALEntryResponse.getDefaultInstance();
088      }
089      ByteString regionName = entries.get(0).getKey().getEncodedRegionName();
090      HRegion region;
091      try {
092        region = server.getRegionByEncodedName(regionName.toStringUtf8());
093      } catch (NotServingRegionException e) {
094        throw new ServiceException(e);
095      }
096
097      AtomicInteger counter =
098        ConcurrentMapUtils.computeIfAbsent(regionToCounter, region, () -> new AtomicInteger(0));
099
100      // fail the first several request
101      if (region.getRegionInfo().getReplicaId() == 1 && counter.addAndGet(entries.size()) < 100) {
102        throw new ServiceException("Inject error!");
103      }
104      return super.replicateToReplica(controller, request);
105    }
106  }
107
108  public static final class RSForTest
109    extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer {
110
111    public RSForTest(Configuration conf) throws IOException, InterruptedException {
112      super(conf);
113    }
114
115    @Override
116    protected RSRpcServices createRpcServices() throws IOException {
117      return new ErrorReplayRSRpcServices(this);
118    }
119  }
120
121  private static final HBaseTestingUtil HTU = new HBaseTestingUtil();
122
123  private static String TN = "test";
124
125  private static byte[] CF = Bytes.toBytes("cf");
126
127  private static byte[] CQ = Bytes.toBytes("cq");
128
129  @BeforeClass
130  public static void setUp() throws Exception {
131    HTU.getConfiguration().setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY,
132      true);
133    HTU.startMiniCluster(
134      StartTestingClusterOption.builder().rsClass(RSForTest.class).numRegionServers(3).build());
135  }
136
137  @AfterClass
138  public static void tearDown() throws Exception {
139    HTU.shutdownMiniCluster();
140  }
141
142  private boolean checkReplica(Table table, int replicaId) throws IOException {
143    boolean ret = true;
144    for (int i = 0; i < 500; i++) {
145      Result result = table.get(new Get(Bytes.toBytes(i)).setReplicaId(replicaId));
146      byte[] value = result.getValue(CF, CQ);
147      ret &= value != null && value.length > 0 && Bytes.toInt(value) == i;
148    }
149    return ret;
150  }
151
152  @Test
153  public void testDefaultDurability() throws IOException {
154    doTest(false);
155  }
156
157  @Test
158  public void testSkipWAL() throws IOException {
159    doTest(true);
160  }
161
162  private void doTest(boolean skipWAL) throws IOException {
163    TableName tableName = TableName.valueOf(TN + (skipWAL ? "_skipWAL" : ""));
164    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName)
165      .setRegionReplication(3).setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF));
166    if (skipWAL) {
167      builder.setDurability(Durability.SKIP_WAL);
168    }
169    TableDescriptor td = builder.build();
170    HTU.getAdmin().createTable(td);
171
172    try (Table table = HTU.getConnection().getTable(tableName)) {
173      for (int i = 0; i < 500; i++) {
174        table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
175      }
176      HTU.waitFor(30000, () -> checkReplica(table, 2));
177      HTU.waitFor(30000, () -> checkReplica(table, 1));
178    }
179  }
180}