001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024import static org.junit.Assert.fail; 025 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.EnumSet; 029import java.util.HashSet; 030import java.util.List; 031import java.util.Set; 032import java.util.concurrent.atomic.AtomicBoolean; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.hbase.Abortable; 035import org.apache.hadoop.hbase.ClusterMetrics.Option; 036import org.apache.hadoop.hbase.HBaseClassTestRule; 037import org.apache.hadoop.hbase.HBaseTestingUtility; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.HRegionLocation; 040import org.apache.hadoop.hbase.MetaTableAccessor; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.TableNotFoundException; 044import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 045import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 046import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 047import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; 048import org.apache.hadoop.hbase.testclassification.LargeTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; 051import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 052import org.apache.hadoop.hbase.zookeeper.ZKUtil; 053import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 054import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 055import org.junit.After; 056import org.junit.Before; 057import org.junit.ClassRule; 058import org.junit.Rule; 059import org.junit.Test; 060import org.junit.experimental.categories.Category; 061import org.junit.rules.TestName; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065/** 066 * Tests the scenarios where replicas are enabled for the meta table 067 */ 068@Category(LargeTests.class) 069public class TestMetaWithReplicas { 070 071 @ClassRule 072 public static final HBaseClassTestRule CLASS_RULE = 073 HBaseClassTestRule.forClass(TestMetaWithReplicas.class); 074 075 private static final Logger LOG = LoggerFactory.getLogger(TestMetaWithReplicas.class); 076 private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 077 private static final int REGIONSERVERS_COUNT = 3; 078 079 @Rule 080 public TestName name = new TestName(); 081 082 @Before 083 public void setup() throws Exception { 084 TEST_UTIL.getConfiguration().setInt("zookeeper.session.timeout", 30000); 085 TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3); 086 TEST_UTIL.getConfiguration().setInt( 087 StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000); 088 TEST_UTIL.startMiniCluster(REGIONSERVERS_COUNT); 089 AssignmentManager am = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(); 090 Set<ServerName> sns = new HashSet<ServerName>(); 091 ServerName hbaseMetaServerName = 092 MetaTableLocator.getMetaRegionLocation(TEST_UTIL.getZooKeeperWatcher()); 093 LOG.info("HBASE:META DEPLOY: on " + hbaseMetaServerName); 094 sns.add(hbaseMetaServerName); 095 for (int replicaId = 1; replicaId < 3; replicaId++) { 096 RegionInfo h = RegionReplicaUtil 097 .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId); 098 AssignmentTestingUtil.waitForAssignment(am, h); 099 ServerName sn = am.getRegionStates().getRegionServerOfRegion(h); 100 assertNotNull(sn); 101 LOG.info("HBASE:META DEPLOY: " + h.getRegionNameAsString() + " on " + sn); 102 sns.add(sn); 103 } 104 // Fun. All meta region replicas have ended up on the one server. This will cause this test 105 // to fail ... sometimes. 106 if (sns.size() == 1) { 107 int count = TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size(); 108 assertTrue("count=" + count, count == REGIONSERVERS_COUNT); 109 LOG.warn("All hbase:meta replicas are on the one server; moving hbase:meta: " + sns); 110 int metaServerIndex = TEST_UTIL.getHBaseCluster().getServerWithMeta(); 111 int newServerIndex = metaServerIndex; 112 while (newServerIndex == metaServerIndex) { 113 newServerIndex = (newServerIndex + 1) % REGIONSERVERS_COUNT; 114 } 115 assertNotEquals(metaServerIndex, newServerIndex); 116 ServerName destinationServerName = 117 TEST_UTIL.getHBaseCluster().getRegionServer(newServerIndex).getServerName(); 118 ServerName metaServerName = 119 TEST_UTIL.getHBaseCluster().getRegionServer(metaServerIndex).getServerName(); 120 assertNotEquals(destinationServerName, metaServerName); 121 TEST_UTIL.getAdmin().move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 122 destinationServerName); 123 } 124 // Disable the balancer 125 LoadBalancerTracker l = new LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(), 126 new Abortable() { 127 AtomicBoolean aborted = new AtomicBoolean(false); 128 @Override 129 public boolean isAborted() { 130 return aborted.get(); 131 } 132 @Override 133 public void abort(String why, Throwable e) { 134 aborted.set(true); 135 } 136 }); 137 l.setBalancerOn(false); 138 LOG.debug("All meta replicas assigned"); 139 } 140 141 @After 142 public void tearDown() throws Exception { 143 TEST_UTIL.shutdownMiniCluster(); 144 } 145 146 @Test 147 public void testMetaHTDReplicaCount() throws Exception { 148 assertEquals(3, 149 TEST_UTIL.getAdmin().getDescriptor(TableName.META_TABLE_NAME).getRegionReplication()); 150 } 151 152 @Test 153 public void testZookeeperNodesForReplicas() throws Exception { 154 // Checks all the znodes exist when meta's replicas are enabled 155 ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); 156 Configuration conf = TEST_UTIL.getConfiguration(); 157 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, 158 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 159 String primaryMetaZnode = ZNodePaths.joinZNode(baseZNode, 160 conf.get("zookeeper.znode.metaserver", "meta-region-server")); 161 // check that the data in the znode is parseable (this would also mean the znode exists) 162 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); 163 ProtobufUtil.toServerName(data); 164 for (int i = 1; i < 3; i++) { 165 String secZnode = ZNodePaths.joinZNode(baseZNode, 166 conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i); 167 String str = zkw.getZNodePaths().getZNodeForReplica(i); 168 assertTrue(str.equals(secZnode)); 169 // check that the data in the znode is parseable (this would also mean the znode exists) 170 data = ZKUtil.getData(zkw, secZnode); 171 ProtobufUtil.toServerName(data); 172 } 173 } 174 175 @Test 176 public void testShutdownHandling() throws Exception { 177 // This test creates a table, flushes the meta (with 3 replicas), kills the 178 // server holding the primary meta replica. Then it does a put/get into/from 179 // the test table. The put/get operations would use the replicas to locate the 180 // location of the test table's region 181 shutdownMetaAndDoValidations(TEST_UTIL); 182 } 183 184 public static void shutdownMetaAndDoValidations(HBaseTestingUtility util) throws Exception { 185 // This test creates a table, flushes the meta (with 3 replicas), kills the 186 // server holding the primary meta replica. Then it does a put/get into/from 187 // the test table. The put/get operations would use the replicas to locate the 188 // location of the test table's region 189 ZKWatcher zkw = util.getZooKeeperWatcher(); 190 Configuration conf = util.getConfiguration(); 191 conf.setBoolean(HConstants.USE_META_REPLICAS, true); 192 193 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, 194 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 195 String primaryMetaZnode = ZNodePaths.joinZNode(baseZNode, 196 conf.get("zookeeper.znode.metaserver", "meta-region-server")); 197 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); 198 ServerName primary = ProtobufUtil.toServerName(data); 199 LOG.info("Primary=" + primary.toString()); 200 201 TableName TABLE = TableName.valueOf("testShutdownHandling"); 202 byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") }; 203 if (util.getAdmin().tableExists(TABLE)) { 204 util.getAdmin().disableTable(TABLE); 205 util.getAdmin().deleteTable(TABLE); 206 } 207 byte[] row = Bytes.toBytes("test"); 208 ServerName master = null; 209 try (Connection c = ConnectionFactory.createConnection(conf)) { 210 try (Table htable = util.createTable(TABLE, FAMILIES)) { 211 util.getAdmin().flush(TableName.META_TABLE_NAME); 212 Thread.sleep( 213 conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 30000) * 6); 214 List<RegionInfo> regions = MetaTableAccessor.getTableRegions(c, TABLE); 215 HRegionLocation hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0)); 216 // Ensure that the primary server for test table is not the same one as the primary 217 // of the meta region since we will be killing the srv holding the meta's primary... 218 // We want to be able to write to the test table even when the meta is not present .. 219 // If the servers are the same, then move the test table's region out of the server 220 // to another random server 221 if (hrl.getServerName().equals(primary)) { 222 util.getAdmin().move(hrl.getRegion().getEncodedNameAsBytes()); 223 // wait for the move to complete 224 do { 225 Thread.sleep(10); 226 hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0)); 227 } while (primary.equals(hrl.getServerName())); 228 util.getAdmin().flush(TableName.META_TABLE_NAME); 229 Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 230 30000) * 3); 231 } 232 // Ensure all metas are not on same hbase:meta replica=0 server! 233 234 master = util.getHBaseClusterInterface().getClusterMetrics().getMasterName(); 235 // kill the master so that regionserver recovery is not triggered at all 236 // for the meta server 237 LOG.info("Stopping master=" + master.toString()); 238 util.getHBaseClusterInterface().stopMaster(master); 239 util.getHBaseClusterInterface().waitForMasterToStop(master, 60000); 240 LOG.info("Master " + master + " stopped!"); 241 if (!master.equals(primary)) { 242 util.getHBaseClusterInterface().killRegionServer(primary); 243 util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000); 244 } 245 c.clearRegionLocationCache(); 246 } 247 LOG.info("Running GETs"); 248 try (Table htable = c.getTable(TABLE)) { 249 Put put = new Put(row); 250 put.addColumn("foo".getBytes(), row, row); 251 BufferedMutator m = c.getBufferedMutator(TABLE); 252 m.mutate(put); 253 m.flush(); 254 // Try to do a get of the row that was just put 255 Result r = htable.get(new Get(row)); 256 assertTrue(Arrays.equals(r.getRow(), row)); 257 // now start back the killed servers and disable use of replicas. That would mean 258 // calls go to the primary 259 LOG.info("Starting Master"); 260 util.getHBaseClusterInterface().startMaster(master.getHostname(), 0); 261 util.getHBaseClusterInterface().startRegionServer(primary.getHostname(), 0); 262 util.getHBaseClusterInterface().waitForActiveAndReadyMaster(); 263 LOG.info("Master active!"); 264 c.clearRegionLocationCache(); 265 } 266 } 267 conf.setBoolean(HConstants.USE_META_REPLICAS, false); 268 LOG.info("Running GETs no replicas"); 269 try (Connection c = ConnectionFactory.createConnection(conf)) { 270 try (Table htable = c.getTable(TABLE)) { 271 Result r = htable.get(new Get(row)); 272 assertTrue(Arrays.equals(r.getRow(), row)); 273 } 274 } 275 } 276 277 @Test 278 public void testAccessingUnknownTables() throws Exception { 279 Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); 280 conf.setBoolean(HConstants.USE_META_REPLICAS, true); 281 Table table = TEST_UTIL.getConnection().getTable(TableName.valueOf(name.getMethodName())); 282 Get get = new Get(Bytes.toBytes("foo")); 283 try { 284 table.get(get); 285 } catch (TableNotFoundException t) { 286 return; 287 } 288 fail("Expected TableNotFoundException"); 289 } 290 291 @Test 292 public void testMetaAddressChange() throws Exception { 293 // checks that even when the meta's location changes, the various 294 // caches update themselves. Uses the master operations to test 295 // this 296 Configuration conf = TEST_UTIL.getConfiguration(); 297 ZKWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); 298 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, 299 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 300 String primaryMetaZnode = ZNodePaths.joinZNode(baseZNode, 301 conf.get("zookeeper.znode.metaserver", "meta-region-server")); 302 // check that the data in the znode is parseable (this would also mean the znode exists) 303 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); 304 ServerName currentServer = ProtobufUtil.toServerName(data); 305 Collection<ServerName> liveServers = TEST_UTIL.getAdmin() 306 .getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)).getLiveServerMetrics().keySet(); 307 ServerName moveToServer = null; 308 for (ServerName s : liveServers) { 309 if (!currentServer.equals(s)) { 310 moveToServer = s; 311 } 312 } 313 assertNotNull(moveToServer); 314 final TableName tableName = TableName.valueOf(name.getMethodName()); 315 TEST_UTIL.createTable(tableName, "f"); 316 assertTrue(TEST_UTIL.getAdmin().tableExists(tableName)); 317 TEST_UTIL.getAdmin().move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 318 moveToServer); 319 int i = 0; 320 assertNotEquals(currentServer, moveToServer); 321 LOG.info("CurrentServer=" + currentServer + ", moveToServer=" + moveToServer); 322 final int max = 10000; 323 do { 324 Thread.sleep(10); 325 data = ZKUtil.getData(zkw, primaryMetaZnode); 326 currentServer = ProtobufUtil.toServerName(data); 327 i++; 328 } while (!moveToServer.equals(currentServer) && i < max); //wait for 10 seconds overall 329 assertNotEquals(max, i); 330 TEST_UTIL.getAdmin().disableTable(tableName); 331 assertTrue(TEST_UTIL.getAdmin().isTableDisabled(tableName)); 332 } 333 334 @Test 335 public void testShutdownOfReplicaHolder() throws Exception { 336 // checks that the when the server holding meta replica is shut down, the meta replica 337 // can be recovered 338 try (Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration()); 339 RegionLocator locator = conn.getRegionLocator(TableName.META_TABLE_NAME)) { 340 HRegionLocation hrl = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true).get(1); 341 ServerName oldServer = hrl.getServerName(); 342 TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer); 343 int i = 0; 344 do { 345 LOG.debug("Waiting for the replica " + hrl.getRegion() + " to come up"); 346 Thread.sleep(10000); // wait for the detection/recovery 347 hrl = locator.getRegionLocations(HConstants.EMPTY_START_ROW, true).get(1); 348 i++; 349 } while ((hrl == null || hrl.getServerName().equals(oldServer)) && i < 3); 350 assertNotEquals(3, i); 351 } 352 } 353}