001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.jupiter.api.Assertions.assertArrayEquals;
021import static org.junit.jupiter.api.Assertions.assertTrue;
022
023import java.util.Arrays;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.HRegionLocation;
029import org.apache.hadoop.hbase.MetaTableAccessor;
030import org.apache.hadoop.hbase.ServerName;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
033import org.apache.hadoop.hbase.testclassification.MediumTests;
034import org.apache.hadoop.hbase.testclassification.MiscTests;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.zookeeper.ZKUtil;
037import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
038import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
039import org.junit.jupiter.api.BeforeAll;
040import org.junit.jupiter.api.Tag;
041import org.junit.jupiter.api.Test;
042import org.slf4j.Logger;
043import org.slf4j.LoggerFactory;
044
045import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
046
047@Tag(MiscTests.TAG)
048@Tag(MediumTests.TAG)
049public class TestMetaWithReplicasShutdownHandling extends MetaWithReplicasTestBase {
050
051  private static final Logger LOG =
052    LoggerFactory.getLogger(TestMetaWithReplicasShutdownHandling.class);
053
054  @BeforeAll
055  public static void setUp() throws Exception {
056    startCluster();
057  }
058
059  @Test
060  public void testShutdownHandling() throws Exception {
061    // This test creates a table, flushes the meta (with 3 replicas), kills the
062    // server holding the primary meta replica. Then it does a put/get into/from
063    // the test table. The put/get operations would use the replicas to locate the
064    // location of the test table's region
065    shutdownMetaAndDoValidations(TEST_UTIL);
066  }
067
068  public static void shutdownMetaAndDoValidations(HBaseTestingUtil util) throws Exception {
069    // This test creates a table, flushes the meta (with 3 replicas), kills the
070    // server holding the primary meta replica. Then it does a put/get into/from
071    // the test table. The put/get operations would use the replicas to locate the
072    // location of the test table's region
073    ZKWatcher zkw = util.getZooKeeperWatcher();
074    Configuration conf = util.getConfiguration();
075    conf.setBoolean(HConstants.USE_META_REPLICAS, true);
076
077    String baseZNode =
078      conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
079    String primaryMetaZnode =
080      ZNodePaths.joinZNode(baseZNode, conf.get("zookeeper.znode.metaserver", "meta-region-server"));
081    byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
082    ServerName primary = ProtobufUtil.toServerName(data);
083    LOG.info("Primary=" + primary.toString());
084
085    TableName TABLE = TableName.valueOf("testShutdownHandling");
086    byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") };
087    if (util.getAdmin().tableExists(TABLE)) {
088      util.getAdmin().disableTable(TABLE);
089      util.getAdmin().deleteTable(TABLE);
090    }
091    byte[] row = Bytes.toBytes("test");
092    ServerName master = null;
093    try (Connection c = ConnectionFactory.createConnection(util.getConfiguration())) {
094      try (Table htable = util.createTable(TABLE, FAMILIES)) {
095        util.getAdmin().flush(TableName.META_TABLE_NAME);
096        Thread.sleep(
097          conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 30000) * 6);
098        List<RegionInfo> regions = MetaTableAccessor.getTableRegions(c, TABLE);
099        HRegionLocation hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
100        // Ensure that the primary server for test table is not the same one as the primary
101        // of the meta region since we will be killing the srv holding the meta's primary...
102        // We want to be able to write to the test table even when the meta is not present ..
103        // If the servers are the same, then move the test table's region out of the server
104        // to another random server
105        if (hrl.getServerName().equals(primary)) {
106          util.getAdmin().move(hrl.getRegion().getEncodedNameAsBytes());
107          // wait for the move to complete
108          do {
109            Thread.sleep(10);
110            hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
111          } while (primary.equals(hrl.getServerName()));
112          util.getAdmin().flush(TableName.META_TABLE_NAME);
113          Thread.sleep(
114            conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 30000) * 3);
115        }
116        // Ensure all metas are not on same hbase:meta replica=0 server!
117
118        master = util.getHBaseClusterInterface().getClusterMetrics().getMasterName();
119        // kill the master so that regionserver recovery is not triggered at all
120        // for the meta server
121        LOG.info("Stopping master=" + master.toString());
122        util.getHBaseClusterInterface().stopMaster(master);
123        util.getHBaseClusterInterface().waitForMasterToStop(master, 60000);
124        LOG.info("Master " + master + " stopped!");
125        if (!master.equals(primary)) {
126          util.getHBaseClusterInterface().killRegionServer(primary);
127          util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000);
128        }
129        c.clearRegionLocationCache();
130      }
131      LOG.info("Running GETs");
132      try (Table htable = c.getTable(TABLE)) {
133        Put put = new Put(row);
134        put.addColumn(Bytes.toBytes("foo"), row, row);
135        BufferedMutator m = c.getBufferedMutator(TABLE);
136        m.mutate(put);
137        m.flush();
138        // Try to do a get of the row that was just put
139        Result r = htable.get(new Get(row));
140        assertTrue(Arrays.equals(r.getRow(), row));
141        // now start back the killed servers and disable use of replicas. That would mean
142        // calls go to the primary
143        LOG.info("Starting Master");
144        util.getHBaseClusterInterface().startMaster(master.getHostname(), 0);
145        util.getHBaseClusterInterface().startRegionServer(primary.getHostname(), 0);
146        util.getHBaseClusterInterface().waitForActiveAndReadyMaster();
147        LOG.info("Master active!");
148        c.clearRegionLocationCache();
149      }
150    }
151    conf.setBoolean(HConstants.USE_META_REPLICAS, false);
152    LOG.info("Running GETs no replicas");
153    try (Connection c = ConnectionFactory.createConnection(conf);
154      Table htable = c.getTable(TABLE)) {
155      Result r = htable.get(new Get(row));
156      assertArrayEquals(row, r.getRow());
157    }
158  }
159}