001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.test; 020 021import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 022import org.apache.hadoop.conf.Configuration; 023import org.apache.hadoop.conf.Configured; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.HBaseConfiguration; 026import org.apache.hadoop.hbase.HRegionLocation; 027import org.apache.hadoop.hbase.IntegrationTestingUtility; 028import org.apache.hadoop.hbase.ServerName; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.client.Connection; 032import org.apache.hadoop.hbase.client.ConnectionFactory; 033import org.apache.hadoop.hbase.client.Admin; 034import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; 035import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; 036import org.apache.hadoop.util.Tool; 037import org.apache.hadoop.util.ToolRunner; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 042 043import java.util.Collections; 044import java.util.HashMap; 045import java.util.List; 046import java.util.Set; 047import java.util.TreeSet; 048import java.util.UUID; 049 050 051/** 052 * This is an integration test for replication. It is derived off 053 * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} that creates a large circular 054 * linked list in one cluster and verifies that the data is correct in a sink cluster. The test 055 * handles creating the tables and schema and setting up the replication. 056 */ 057public class IntegrationTestReplication extends IntegrationTestBigLinkedList { 058 protected String sourceClusterIdString; 059 protected String sinkClusterIdString; 060 protected int numIterations; 061 protected int numMappers; 062 protected long numNodes; 063 protected String outputDir; 064 protected int numReducers; 065 protected int generateVerifyGap; 066 protected Integer width; 067 protected Integer wrapMultiplier; 068 protected boolean noReplicationSetup = false; 069 070 private final String SOURCE_CLUSTER_OPT = "sourceCluster"; 071 private final String DEST_CLUSTER_OPT = "destCluster"; 072 private final String ITERATIONS_OPT = "iterations"; 073 private final String NUM_MAPPERS_OPT = "numMappers"; 074 private final String OUTPUT_DIR_OPT = "outputDir"; 075 private final String NUM_REDUCERS_OPT = "numReducers"; 076 private final String NO_REPLICATION_SETUP_OPT = "noReplicationSetup"; 077 078 /** 079 * The gap (in seconds) from when data is finished being generated at the source 080 * to when it can be verified. This is the replication lag we are willing to tolerate 081 */ 082 private final String GENERATE_VERIFY_GAP_OPT = "generateVerifyGap"; 083 084 /** 085 * The width of the linked list. 086 * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details 087 */ 088 private final String WIDTH_OPT = "width"; 089 090 /** 091 * The number of rows after which the linked list points to the first row. 092 * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details 093 */ 094 private final String WRAP_MULTIPLIER_OPT = "wrapMultiplier"; 095 096 /** 097 * The number of nodes in the test setup. This has to be a multiple of WRAP_MULTIPLIER * WIDTH 098 * in order to ensure that the linked list can is complete. 099 * See {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} for more details 100 */ 101 private final String NUM_NODES_OPT = "numNodes"; 102 103 private final int DEFAULT_NUM_MAPPERS = 1; 104 private final int DEFAULT_NUM_REDUCERS = 1; 105 private final int DEFAULT_NUM_ITERATIONS = 1; 106 private final int DEFAULT_GENERATE_VERIFY_GAP = 60; 107 private final int DEFAULT_WIDTH = 1000000; 108 private final int DEFAULT_WRAP_MULTIPLIER = 25; 109 private final int DEFAULT_NUM_NODES = DEFAULT_WIDTH * DEFAULT_WRAP_MULTIPLIER; 110 111 /** 112 * Wrapper around an HBase ClusterID allowing us 113 * to get admin connections and configurations for it 114 */ 115 protected class ClusterID { 116 private final Configuration configuration; 117 private Connection connection = null; 118 119 /** 120 * This creates a new ClusterID wrapper that will automatically build connections and 121 * configurations to be able to talk to the specified cluster 122 * 123 * @param base the base configuration that this class will add to 124 * @param key the cluster key in the form of zk_quorum:zk_port:zk_parent_node 125 */ 126 public ClusterID(Configuration base, 127 String key) { 128 configuration = new Configuration(base); 129 String[] parts = key.split(":"); 130 configuration.set(HConstants.ZOOKEEPER_QUORUM, parts[0]); 131 configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT, parts[1]); 132 configuration.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parts[2]); 133 } 134 135 @Override 136 public String toString() { 137 return Joiner.on(":").join(configuration.get(HConstants.ZOOKEEPER_QUORUM), 138 configuration.get(HConstants.ZOOKEEPER_CLIENT_PORT), 139 configuration.get(HConstants.ZOOKEEPER_ZNODE_PARENT)); 140 } 141 142 public Configuration getConfiguration() { 143 return this.configuration; 144 } 145 146 public Connection getConnection() throws Exception { 147 if (this.connection == null) { 148 this.connection = ConnectionFactory.createConnection(this.configuration); 149 } 150 return this.connection; 151 } 152 153 public void closeConnection() throws Exception { 154 this.connection.close(); 155 this.connection = null; 156 } 157 158 public boolean equals(ClusterID other) { 159 return this.toString().equalsIgnoreCase(other.toString()); 160 } 161 } 162 163 /** 164 * The main runner loop for the test. It uses 165 * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList} 166 * for the generation and verification of the linked list. It is heavily based on 167 * {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Loop} 168 */ 169 protected class VerifyReplicationLoop extends Configured implements Tool { 170 private final Logger LOG = LoggerFactory.getLogger(VerifyReplicationLoop.class); 171 protected ClusterID source; 172 protected ClusterID sink; 173 174 IntegrationTestBigLinkedList integrationTestBigLinkedList; 175 176 /** 177 * This tears down any tables that existed from before and rebuilds the tables and schemas on 178 * the source cluster. It then sets up replication from the source to the sink cluster by using 179 * the {@link org.apache.hadoop.hbase.client.replication.ReplicationAdmin} 180 * connection. 181 * 182 * @throws Exception 183 */ 184 protected void setupTablesAndReplication() throws Exception { 185 TableName tableName = getTableName(source.getConfiguration()); 186 187 ClusterID[] clusters = {source, sink}; 188 189 // delete any old tables in the source and sink 190 for (ClusterID cluster : clusters) { 191 Admin admin = cluster.getConnection().getAdmin(); 192 193 if (admin.tableExists(tableName)) { 194 if (admin.isTableEnabled(tableName)) { 195 admin.disableTable(tableName); 196 } 197 198 /** 199 * TODO: This is a work around on a replication bug (HBASE-13416) 200 * When we recreate a table against that has recently been 201 * deleted, the contents of the logs are replayed even though 202 * they should not. This ensures that we flush the logs 203 * before the table gets deleted. Eventually the bug should be 204 * fixed and this should be removed. 205 */ 206 Set<ServerName> regionServers = new TreeSet<>(); 207 for (HRegionLocation rl : 208 cluster.getConnection().getRegionLocator(tableName).getAllRegionLocations()) { 209 regionServers.add(rl.getServerName()); 210 } 211 212 for (ServerName server : regionServers) { 213 source.getConnection().getAdmin().rollWALWriter(server); 214 } 215 216 admin.deleteTable(tableName); 217 } 218 } 219 220 // create the schema 221 Generator generator = new Generator(); 222 generator.setConf(source.getConfiguration()); 223 generator.createSchema(); 224 225 // setup the replication on the source 226 if (!source.equals(sink)) { 227 try (final Admin admin = source.getConnection().getAdmin()) { 228 // remove any old replication peers 229 for (ReplicationPeerDescription peer : admin.listReplicationPeers()) { 230 admin.removeReplicationPeer(peer.getPeerId()); 231 } 232 233 // set the test table to be the table to replicate 234 HashMap<TableName, List<String>> toReplicate = new HashMap<>(); 235 toReplicate.put(tableName, Collections.emptyList()); 236 237 // set the sink to be the target 238 final ReplicationPeerConfig peerConfig = ReplicationPeerConfig.newBuilder() 239 .setClusterKey(sink.toString()) 240 .setReplicateAllUserTables(false) 241 .setTableCFsMap(toReplicate).build(); 242 243 admin.addReplicationPeer("TestPeer", peerConfig); 244 admin.enableTableReplication(tableName); 245 } 246 } 247 248 for (ClusterID cluster : clusters) { 249 cluster.closeConnection(); 250 } 251 } 252 253 protected void waitForReplication() throws Exception { 254 // TODO: we shouldn't be sleeping here. It would be better to query the region servers 255 // and wait for them to report 0 replication lag. 256 Thread.sleep(generateVerifyGap * 1000); 257 } 258 259 /** 260 * Run the {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Generator} in the 261 * source cluster. This assumes that the tables have been setup via setupTablesAndReplication. 262 * 263 * @throws Exception 264 */ 265 protected void runGenerator() throws Exception { 266 Path outputPath = new Path(outputDir); 267 UUID uuid = util.getRandomUUID(); //create a random UUID. 268 Path generatorOutput = new Path(outputPath, uuid.toString()); 269 270 Generator generator = new Generator(); 271 generator.setConf(source.getConfiguration()); 272 273 // Disable concurrent walkers for IntegrationTestReplication 274 int retCode = generator.run(numMappers, numNodes, generatorOutput, width, wrapMultiplier, 0); 275 if (retCode > 0) { 276 throw new RuntimeException("Generator failed with return code: " + retCode); 277 } 278 } 279 280 281 /** 282 * Run the {@link org.apache.hadoop.hbase.test.IntegrationTestBigLinkedList.Verify} 283 * in the sink cluster. If replication is working properly the data written at the source 284 * cluster should be available in the sink cluster after a reasonable gap 285 * 286 * @param expectedNumNodes the number of nodes we are expecting to see in the sink cluster 287 * @throws Exception 288 */ 289 protected void runVerify(long expectedNumNodes) throws Exception { 290 Path outputPath = new Path(outputDir); 291 UUID uuid = util.getRandomUUID(); //create a random UUID. 292 Path iterationOutput = new Path(outputPath, uuid.toString()); 293 294 Verify verify = new Verify(); 295 verify.setConf(sink.getConfiguration()); 296 297 int retCode = verify.run(iterationOutput, numReducers); 298 if (retCode > 0) { 299 throw new RuntimeException("Verify.run failed with return code: " + retCode); 300 } 301 302 if (!verify.verify(expectedNumNodes)) { 303 throw new RuntimeException("Verify.verify failed"); 304 } 305 306 LOG.info("Verify finished with success. Total nodes=" + expectedNumNodes); 307 } 308 309 /** 310 * The main test runner 311 * 312 * This test has 4 steps: 313 * 1: setupTablesAndReplication 314 * 2: generate the data into the source cluster 315 * 3: wait for replication to propagate 316 * 4: verify that the data is available in the sink cluster 317 * 318 * @param args should be empty 319 * @return 0 on success 320 * @throws Exception on an error 321 */ 322 @Override 323 public int run(String[] args) throws Exception { 324 source = new ClusterID(getConf(), sourceClusterIdString); 325 sink = new ClusterID(getConf(), sinkClusterIdString); 326 327 if (!noReplicationSetup) { 328 setupTablesAndReplication(); 329 } 330 int expectedNumNodes = 0; 331 for (int i = 0; i < numIterations; i++) { 332 LOG.info("Starting iteration = " + i); 333 334 expectedNumNodes += numMappers * numNodes; 335 336 runGenerator(); 337 waitForReplication(); 338 runVerify(expectedNumNodes); 339 } 340 341 /** 342 * we are always returning 0 because exceptions are thrown when there is an error 343 * in the verification step. 344 */ 345 return 0; 346 } 347 } 348 349 @Override 350 protected void addOptions() { 351 super.addOptions(); 352 addRequiredOptWithArg("s", SOURCE_CLUSTER_OPT, 353 "Cluster ID of the source cluster (e.g. localhost:2181:/hbase)"); 354 addRequiredOptWithArg("r", DEST_CLUSTER_OPT, 355 "Cluster ID of the sink cluster (e.g. localhost:2182:/hbase)"); 356 addRequiredOptWithArg("d", OUTPUT_DIR_OPT, 357 "Temporary directory where to write keys for the test"); 358 359 addOptWithArg("nm", NUM_MAPPERS_OPT, 360 "Number of mappers (default: " + DEFAULT_NUM_MAPPERS + ")"); 361 addOptWithArg("nr", NUM_REDUCERS_OPT, 362 "Number of reducers (default: " + DEFAULT_NUM_MAPPERS + ")"); 363 addOptNoArg("nrs", NO_REPLICATION_SETUP_OPT, 364 "Don't setup tables or configure replication before starting test"); 365 addOptWithArg("n", NUM_NODES_OPT, 366 "Number of nodes. This should be a multiple of width * wrapMultiplier." + 367 " (default: " + DEFAULT_NUM_NODES + ")"); 368 addOptWithArg("i", ITERATIONS_OPT, "Number of iterations to run (default: " + 369 DEFAULT_NUM_ITERATIONS + ")"); 370 addOptWithArg("t", GENERATE_VERIFY_GAP_OPT, 371 "Gap between generate and verify steps in seconds (default: " + 372 DEFAULT_GENERATE_VERIFY_GAP + ")"); 373 addOptWithArg("w", WIDTH_OPT, 374 "Width of the linked list chain (default: " + DEFAULT_WIDTH + ")"); 375 addOptWithArg("wm", WRAP_MULTIPLIER_OPT, "How many times to wrap around (default: " + 376 DEFAULT_WRAP_MULTIPLIER + ")"); 377 } 378 379 @Override 380 protected void processOptions(CommandLine cmd) { 381 processBaseOptions(cmd); 382 383 sourceClusterIdString = cmd.getOptionValue(SOURCE_CLUSTER_OPT); 384 sinkClusterIdString = cmd.getOptionValue(DEST_CLUSTER_OPT); 385 outputDir = cmd.getOptionValue(OUTPUT_DIR_OPT); 386 387 /** This uses parseInt from {@link org.apache.hadoop.hbase.util.AbstractHBaseTool} */ 388 numMappers = parseInt(cmd.getOptionValue(NUM_MAPPERS_OPT, 389 Integer.toString(DEFAULT_NUM_MAPPERS)), 390 1, Integer.MAX_VALUE); 391 numReducers = parseInt(cmd.getOptionValue(NUM_REDUCERS_OPT, 392 Integer.toString(DEFAULT_NUM_REDUCERS)), 393 1, Integer.MAX_VALUE); 394 numNodes = parseInt(cmd.getOptionValue(NUM_NODES_OPT, Integer.toString(DEFAULT_NUM_NODES)), 395 1, Integer.MAX_VALUE); 396 generateVerifyGap = parseInt(cmd.getOptionValue(GENERATE_VERIFY_GAP_OPT, 397 Integer.toString(DEFAULT_GENERATE_VERIFY_GAP)), 398 1, Integer.MAX_VALUE); 399 numIterations = parseInt(cmd.getOptionValue(ITERATIONS_OPT, 400 Integer.toString(DEFAULT_NUM_ITERATIONS)), 401 1, Integer.MAX_VALUE); 402 width = parseInt(cmd.getOptionValue(WIDTH_OPT, Integer.toString(DEFAULT_WIDTH)), 403 1, Integer.MAX_VALUE); 404 wrapMultiplier = parseInt(cmd.getOptionValue(WRAP_MULTIPLIER_OPT, 405 Integer.toString(DEFAULT_WRAP_MULTIPLIER)), 406 1, Integer.MAX_VALUE); 407 408 if (cmd.hasOption(NO_REPLICATION_SETUP_OPT)) { 409 noReplicationSetup = true; 410 } 411 412 if (numNodes % (width * wrapMultiplier) != 0) { 413 throw new RuntimeException("numNodes must be a multiple of width and wrap multiplier"); 414 } 415 } 416 417 @Override 418 public int runTestFromCommandLine() throws Exception { 419 VerifyReplicationLoop tool = new VerifyReplicationLoop(); 420 tool.integrationTestBigLinkedList = this; 421 return ToolRunner.run(getConf(), tool, null); 422 } 423 424 public static void main(String[] args) throws Exception { 425 Configuration conf = HBaseConfiguration.create(); 426 IntegrationTestingUtility.setUseDistributedCluster(conf); 427 int ret = ToolRunner.run(conf, new IntegrationTestReplication(), args); 428 System.exit(ret); 429 } 430}