001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.util; 020 021import java.io.IOException; 022import java.math.BigInteger; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.LinkedList; 026import java.util.List; 027import java.util.Map; 028import java.util.Set; 029import java.util.TreeMap; 030import org.apache.commons.lang3.ArrayUtils; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.FSDataInputStream; 034import org.apache.hadoop.fs.FSDataOutputStream; 035import org.apache.hadoop.fs.FileSystem; 036import org.apache.hadoop.fs.Path; 037import org.apache.hadoop.hbase.HBaseConfiguration; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.HRegionInfo; 040import org.apache.hadoop.hbase.HRegionLocation; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.yetus.audience.InterfaceAudience; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048import org.apache.hadoop.hbase.client.Admin; 049import org.apache.hadoop.hbase.client.ClusterConnection; 050import org.apache.hadoop.hbase.client.Connection; 051import org.apache.hadoop.hbase.client.ConnectionFactory; 052import org.apache.hadoop.hbase.client.NoServerForRegionException; 053import org.apache.hadoop.hbase.client.RegionLocator; 054import org.apache.hadoop.hbase.client.Table; 055import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 056 057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 058import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 059import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 060import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 061import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 062import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 063import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 064import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; 065import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 066import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionBuilder; 067import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 068import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 069 070/** 071 * The {@link RegionSplitter} class provides several utilities to help in the 072 * administration lifecycle for developers who choose to manually split regions 073 * instead of having HBase handle that automatically. The most useful utilities 074 * are: 075 * <p> 076 * <ul> 077 * <li>Create a table with a specified number of pre-split regions 078 * <li>Execute a rolling split of all regions on an existing table 079 * </ul> 080 * <p> 081 * Both operations can be safely done on a live server. 082 * <p> 083 * <b>Question:</b> How do I turn off automatic splitting? <br> 084 * <b>Answer:</b> Automatic splitting is determined by the configuration value 085 * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this 086 * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting 087 * is 100GB, which would result in > 1hr major compactions if reached. 088 * <p> 089 * <b>Question:</b> Why did the original authors decide to manually split? <br> 090 * <b>Answer:</b> Specific workload characteristics of our use case allowed us 091 * to benefit from a manual split system. 092 * <p> 093 * <ul> 094 * <li>Data (~1k) that would grow instead of being replaced 095 * <li>Data growth was roughly uniform across all regions 096 * <li>OLTP workload. Data loss is a big deal. 097 * </ul> 098 * <p> 099 * <b>Question:</b> Why is manual splitting good for this workload? <br> 100 * <b>Answer:</b> Although automated splitting is not a bad option, there are 101 * benefits to manual splitting. 102 * <p> 103 * <ul> 104 * <li>With growing amounts of data, splits will continually be needed. Since 105 * you always know exactly what regions you have, long-term debugging and 106 * profiling is much easier with manual splits. It is hard to trace the logs to 107 * understand region level problems if it keeps splitting and getting renamed. 108 * <li>Data offlining bugs + unknown number of split regions == oh crap! If an 109 * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and 110 * you notice it a day or so later, you can be assured that the regions 111 * specified in these files are the same as the current regions and you have 112 * less headaches trying to restore/replay your data. 113 * <li>You can finely tune your compaction algorithm. With roughly uniform data 114 * growth, it's easy to cause split / compaction storms as the regions all 115 * roughly hit the same data size at the same time. With manual splits, you can 116 * let staggered, time-based major compactions spread out your network IO load. 117 * </ul> 118 * <p> 119 * <b>Question:</b> What's the optimal number of pre-split regions to create? <br> 120 * <b>Answer:</b> Mileage will vary depending upon your application. 121 * <p> 122 * The short answer for our application is that we started with 10 pre-split 123 * regions / server and watched our data growth over time. It's better to err on 124 * the side of too little regions and rolling split later. 125 * <p> 126 * The more complicated answer is that this depends upon the largest storefile 127 * in your region. With a growing data size, this will get larger over time. You 128 * want the largest region to be just big enough that the 129 * {@link org.apache.hadoop.hbase.regionserver.HStore} compact 130 * selection algorithm only compacts it due to a timed major. If you don't, your 131 * cluster can be prone to compaction storms as the algorithm decides to run 132 * major compactions on a large series of regions all at once. Note that 133 * compaction storms are due to the uniform data growth, not the manual split 134 * decision. 135 * <p> 136 * If you pre-split your regions too thin, you can increase the major compaction 137 * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size 138 * grows too large, use this script to perform a network IO safe rolling split 139 * of all regions. 140 */ 141@InterfaceAudience.Private 142public class RegionSplitter { 143 private static final Logger LOG = LoggerFactory.getLogger(RegionSplitter.class); 144 145 /** 146 * A generic interface for the RegionSplitter code to use for all it's 147 * functionality. Note that the original authors of this code use 148 * {@link HexStringSplit} to partition their table and set it as default, but 149 * provided this for your custom algorithm. To use, create a new derived class 150 * from this interface and call {@link RegionSplitter#createPresplitTable} or 151 * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the 152 * argument splitClassName giving the name of your class. 153 */ 154 public interface SplitAlgorithm { 155 /** 156 * Split a pre-existing region into 2 regions. 157 * 158 * @param start 159 * first row (inclusive) 160 * @param end 161 * last row (exclusive) 162 * @return the split row to use 163 */ 164 byte[] split(byte[] start, byte[] end); 165 166 /** 167 * Split an entire table. 168 * 169 * @param numRegions 170 * number of regions to split the table into 171 * 172 * @throws RuntimeException 173 * user input is validated at this time. may throw a runtime 174 * exception in response to a parse failure 175 * @return array of split keys for the initial regions of the table. The 176 * length of the returned array should be numRegions-1. 177 */ 178 byte[][] split(int numRegions); 179 180 /** 181 * Some MapReduce jobs may want to run multiple mappers per region, 182 * this is intended for such usecase. 183 * 184 * @param start first row (inclusive) 185 * @param end last row (exclusive) 186 * @param numSplits number of splits to generate 187 * @param inclusive whether start and end are returned as split points 188 */ 189 byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive); 190 191 /** 192 * In HBase, the first row is represented by an empty byte array. This might 193 * cause problems with your split algorithm or row printing. All your APIs 194 * will be passed firstRow() instead of empty array. 195 * 196 * @return your representation of your first row 197 */ 198 byte[] firstRow(); 199 200 /** 201 * In HBase, the last row is represented by an empty byte array. This might 202 * cause problems with your split algorithm or row printing. All your APIs 203 * will be passed firstRow() instead of empty array. 204 * 205 * @return your representation of your last row 206 */ 207 byte[] lastRow(); 208 209 /** 210 * In HBase, the last row is represented by an empty byte array. Set this 211 * value to help the split code understand how to evenly divide the first 212 * region. 213 * 214 * @param userInput 215 * raw user input (may throw RuntimeException on parse failure) 216 */ 217 void setFirstRow(String userInput); 218 219 /** 220 * In HBase, the last row is represented by an empty byte array. Set this 221 * value to help the split code understand how to evenly divide the last 222 * region. Note that this last row is inclusive for all rows sharing the 223 * same prefix. 224 * 225 * @param userInput 226 * raw user input (may throw RuntimeException on parse failure) 227 */ 228 void setLastRow(String userInput); 229 230 /** 231 * @param input 232 * user or file input for row 233 * @return byte array representation of this row for HBase 234 */ 235 byte[] strToRow(String input); 236 237 /** 238 * @param row 239 * byte array representing a row in HBase 240 * @return String to use for debug & file printing 241 */ 242 String rowToStr(byte[] row); 243 244 /** 245 * @return the separator character to use when storing / printing the row 246 */ 247 String separator(); 248 249 /** 250 * Set the first row 251 * @param userInput byte array of the row key. 252 */ 253 void setFirstRow(byte[] userInput); 254 255 /** 256 * Set the last row 257 * @param userInput byte array of the row key. 258 */ 259 void setLastRow(byte[] userInput); 260 } 261 262 /** 263 * The main function for the RegionSplitter application. Common uses: 264 * <p> 265 * <ul> 266 * <li>create a table named 'myTable' with 60 pre-split regions containing 2 267 * column families 'test' & 'rs', assuming the keys are hex-encoded ASCII: 268 * <ul> 269 * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs 270 * myTable HexStringSplit 271 * </ul> 272 * <li>create a table named 'myTable' with 50 pre-split regions, 273 * assuming the keys are decimal-encoded ASCII: 274 * <ul> 275 * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50 276 * myTable DecimalStringSplit 277 * </ul> 278 * <li>perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2 279 * outstanding splits at a time, assuming keys are uniformly distributed 280 * bytes: 281 * <ul> 282 * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable 283 * UniformSplit 284 * </ul> 285 * </ul> 286 * 287 * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit, 288 * DecimalStringSplit, and UniformSplit. These are different strategies for 289 * choosing region boundaries. See their source code for details. 290 * 291 * @param args 292 * Usage: RegionSplitter <TABLE> <SPLITALGORITHM> 293 * <-c <# regions> -f <family:family:...> | -r 294 * [-o <# outstanding splits>]> 295 * [-D <conf.param=value>] 296 * @throws IOException 297 * HBase IO problem 298 * @throws InterruptedException 299 * user requested exit 300 * @throws ParseException 301 * problem parsing user input 302 */ 303 @SuppressWarnings("static-access") 304 public static void main(String[] args) throws IOException, 305 InterruptedException, ParseException { 306 Configuration conf = HBaseConfiguration.create(); 307 308 // parse user input 309 Options opt = new Options(); 310 opt.addOption(OptionBuilder.withArgName("property=value").hasArg() 311 .withDescription("Override HBase Configuration Settings").create("D")); 312 opt.addOption(OptionBuilder.withArgName("region count").hasArg() 313 .withDescription( 314 "Create a new table with a pre-split number of regions") 315 .create("c")); 316 opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg() 317 .withDescription( 318 "Column Families to create with new table. Required with -c") 319 .create("f")); 320 opt.addOption("h", false, "Print this usage help"); 321 opt.addOption("r", false, "Perform a rolling split of an existing region"); 322 opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription( 323 "Max outstanding splits that have unfinished major compactions") 324 .create("o")); 325 opt.addOption(null, "firstrow", true, 326 "First Row in Table for Split Algorithm"); 327 opt.addOption(null, "lastrow", true, 328 "Last Row in Table for Split Algorithm"); 329 opt.addOption(null, "risky", false, 330 "Skip verification steps to complete quickly. " 331 + "STRONGLY DISCOURAGED for production systems. "); 332 CommandLine cmd = new GnuParser().parse(opt, args); 333 334 if (cmd.hasOption("D")) { 335 for (String confOpt : cmd.getOptionValues("D")) { 336 String[] kv = confOpt.split("=", 2); 337 if (kv.length == 2) { 338 conf.set(kv[0], kv[1]); 339 LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]); 340 } else { 341 throw new ParseException("-D option format invalid: " + confOpt); 342 } 343 } 344 } 345 346 if (cmd.hasOption("risky")) { 347 conf.setBoolean("split.verify", false); 348 } 349 350 boolean createTable = cmd.hasOption("c") && cmd.hasOption("f"); 351 boolean rollingSplit = cmd.hasOption("r"); 352 boolean oneOperOnly = createTable ^ rollingSplit; 353 354 if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) { 355 new HelpFormatter().printHelp("bin/hbase regionsplitter <TABLE> <SPLITALGORITHM>\n"+ 356 "SPLITALGORITHM is the java class name of a class implementing " + 357 "SplitAlgorithm, or one of the special strings HexStringSplit or " + 358 "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " + 359 "HexStringSplit treats keys as hexadecimal ASCII, and " + 360 "DecimalStringSplit treats keys as decimal ASCII, and " + 361 "UniformSplit treats keys as arbitrary bytes.", opt); 362 return; 363 } 364 TableName tableName = TableName.valueOf(cmd.getArgs()[0]); 365 String splitClass = cmd.getArgs()[1]; 366 SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass); 367 368 if (cmd.hasOption("firstrow")) { 369 splitAlgo.setFirstRow(cmd.getOptionValue("firstrow")); 370 } 371 if (cmd.hasOption("lastrow")) { 372 splitAlgo.setLastRow(cmd.getOptionValue("lastrow")); 373 } 374 375 if (createTable) { 376 conf.set("split.count", cmd.getOptionValue("c")); 377 createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf); 378 } 379 380 if (rollingSplit) { 381 if (cmd.hasOption("o")) { 382 conf.set("split.outstanding", cmd.getOptionValue("o")); 383 } 384 rollingSplit(tableName, splitAlgo, conf); 385 } 386 } 387 388 static void createPresplitTable(TableName tableName, SplitAlgorithm splitAlgo, 389 String[] columnFamilies, Configuration conf) 390 throws IOException, InterruptedException { 391 final int splitCount = conf.getInt("split.count", 0); 392 Preconditions.checkArgument(splitCount > 1, "Split count must be > 1"); 393 394 Preconditions.checkArgument(columnFamilies.length > 0, 395 "Must specify at least one column family. "); 396 LOG.debug("Creating table " + tableName + " with " + columnFamilies.length 397 + " column families. Presplitting to " + splitCount + " regions"); 398 399 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); 400 for (String cf : columnFamilies) { 401 builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)); 402 } 403 try (Connection connection = ConnectionFactory.createConnection(conf)) { 404 Admin admin = connection.getAdmin(); 405 try { 406 Preconditions.checkArgument(!admin.tableExists(tableName), 407 "Table already exists: " + tableName); 408 admin.createTable(builder.build(), splitAlgo.split(splitCount)); 409 } finally { 410 admin.close(); 411 } 412 LOG.debug("Table created! Waiting for regions to show online in META..."); 413 if (!conf.getBoolean("split.verify", true)) { 414 // NOTE: createTable is synchronous on the table, but not on the regions 415 int onlineRegions = 0; 416 try (RegionLocator locator = connection.getRegionLocator(tableName)) { 417 while (onlineRegions < splitCount) { 418 onlineRegions = locator.getAllRegionLocations().size(); 419 LOG.debug(onlineRegions + " of " + splitCount + " regions online..."); 420 if (onlineRegions < splitCount) { 421 Thread.sleep(10 * 1000); // sleep 422 } 423 } 424 } 425 } 426 LOG.debug("Finished creating table with " + splitCount + " regions"); 427 } 428 } 429 430 /** 431 * Alternative getCurrentNrHRS which is no longer available. 432 * @param connection 433 * @return Rough count of regionservers out on cluster. 434 * @throws IOException if a remote or network exception occurs 435 */ 436 private static int getRegionServerCount(final Connection connection) throws IOException { 437 try (Admin admin = connection.getAdmin()) { 438 Collection<ServerName> servers = admin.getRegionServers(); 439 return servers == null || servers.isEmpty()? 0: servers.size(); 440 } 441 } 442 443 private static byte [] readFile(final FileSystem fs, final Path path) throws IOException { 444 FSDataInputStream tmpIn = fs.open(path); 445 try { 446 byte [] rawData = new byte[tmpIn.available()]; 447 tmpIn.readFully(rawData); 448 return rawData; 449 } finally { 450 tmpIn.close(); 451 } 452 } 453 454 static void rollingSplit(TableName tableName, SplitAlgorithm splitAlgo, Configuration conf) 455 throws IOException, InterruptedException { 456 final int minOS = conf.getInt("split.outstanding", 2); 457 try (Connection connection = ConnectionFactory.createConnection(conf)) { 458 // Max outstanding splits. default == 50% of servers 459 final int MAX_OUTSTANDING = Math.max(getRegionServerCount(connection) / 2, minOS); 460 461 Path hbDir = CommonFSUtils.getRootDir(conf); 462 Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName); 463 Path splitFile = new Path(tableDir, "_balancedSplit"); 464 FileSystem fs = FileSystem.get(conf); 465 466 // Get a list of daughter regions to create 467 LinkedList<Pair<byte[], byte[]>> tmpRegionSet = null; 468 try (Table table = connection.getTable(tableName)) { 469 tmpRegionSet = getSplits(connection, tableName, splitAlgo); 470 } 471 LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList(); 472 int splitCount = 0; 473 final int origCount = tmpRegionSet.size(); 474 475 // all splits must compact & we have 1 compact thread, so 2 split 476 // requests to the same RS can stall the outstanding split queue. 477 // To fix, group the regions into an RS pool and round-robin through it 478 LOG.debug("Bucketing regions by regionserver..."); 479 TreeMap<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegions = 480 Maps.newTreeMap(); 481 // Get a regionLocator. Need it in below. 482 try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) { 483 for (Pair<byte[], byte[]> dr : tmpRegionSet) { 484 ServerName rsLocation = regionLocator.getRegionLocation(dr.getSecond()).getServerName(); 485 if (!daughterRegions.containsKey(rsLocation)) { 486 LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); 487 daughterRegions.put(rsLocation, entry); 488 } 489 daughterRegions.get(rsLocation).add(dr); 490 } 491 LOG.debug("Done with bucketing. Split time!"); 492 long startTime = System.currentTimeMillis(); 493 494 // Open the split file and modify it as splits finish 495 byte[] rawData = readFile(fs, splitFile); 496 497 FSDataOutputStream splitOut = fs.create(splitFile); 498 try { 499 splitOut.write(rawData); 500 501 try { 502 // *** split code *** 503 while (!daughterRegions.isEmpty()) { 504 LOG.debug(daughterRegions.size() + " RS have regions to splt."); 505 506 // Get ServerName to region count mapping 507 final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap(); 508 List<HRegionLocation> hrls = regionLocator.getAllRegionLocations(); 509 for (HRegionLocation hrl: hrls) { 510 ServerName sn = hrl.getServerName(); 511 if (rsSizes.containsKey(sn)) { 512 rsSizes.put(sn, rsSizes.get(sn) + 1); 513 } else { 514 rsSizes.put(sn, 1); 515 } 516 } 517 518 // Round-robin through the ServerName list. Choose the lightest-loaded servers 519 // first to keep the master from load-balancing regions as we split. 520 for (Map.Entry<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegion : 521 daughterRegions.entrySet()) { 522 Pair<byte[], byte[]> dr = null; 523 ServerName rsLoc = daughterRegion.getKey(); 524 LinkedList<Pair<byte[], byte[]>> regionList = daughterRegion.getValue(); 525 526 // Find a region in the ServerName list that hasn't been moved 527 LOG.debug("Finding a region on " + rsLoc); 528 while (!regionList.isEmpty()) { 529 dr = regionList.pop(); 530 531 // get current region info 532 byte[] split = dr.getSecond(); 533 HRegionLocation regionLoc = regionLocator.getRegionLocation(split); 534 535 // if this region moved locations 536 ServerName newRs = regionLoc.getServerName(); 537 if (newRs.compareTo(rsLoc) != 0) { 538 LOG.debug("Region with " + splitAlgo.rowToStr(split) 539 + " moved to " + newRs + ". Relocating..."); 540 // relocate it, don't use it right now 541 if (!daughterRegions.containsKey(newRs)) { 542 LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList(); 543 daughterRegions.put(newRs, entry); 544 } 545 daughterRegions.get(newRs).add(dr); 546 dr = null; 547 continue; 548 } 549 550 // make sure this region wasn't already split 551 byte[] sk = regionLoc.getRegionInfo().getStartKey(); 552 if (sk.length != 0) { 553 if (Bytes.equals(split, sk)) { 554 LOG.debug("Region already split on " 555 + splitAlgo.rowToStr(split) + ". Skipping this region..."); 556 ++splitCount; 557 dr = null; 558 continue; 559 } 560 byte[] start = dr.getFirst(); 561 Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo 562 .rowToStr(start) + " != " + splitAlgo.rowToStr(sk)); 563 } 564 565 // passed all checks! found a good region 566 break; 567 } 568 if (regionList.isEmpty()) { 569 daughterRegions.remove(rsLoc); 570 } 571 if (dr == null) 572 continue; 573 574 // we have a good region, time to split! 575 byte[] split = dr.getSecond(); 576 LOG.debug("Splitting at " + splitAlgo.rowToStr(split)); 577 try (Admin admin = connection.getAdmin()) { 578 admin.split(tableName, split); 579 } 580 581 LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList(); 582 LinkedList<Pair<byte[], byte[]>> local_finished = Lists.newLinkedList(); 583 if (conf.getBoolean("split.verify", true)) { 584 // we need to verify and rate-limit our splits 585 outstanding.addLast(dr); 586 // with too many outstanding splits, wait for some to finish 587 while (outstanding.size() >= MAX_OUTSTANDING) { 588 LOG.debug("Wait for outstanding splits " + outstanding.size()); 589 local_finished = splitScan(outstanding, connection, tableName, splitAlgo); 590 if (local_finished.isEmpty()) { 591 Thread.sleep(30 * 1000); 592 } else { 593 finished.addAll(local_finished); 594 outstanding.removeAll(local_finished); 595 LOG.debug(local_finished.size() + " outstanding splits finished"); 596 } 597 } 598 } else { 599 finished.add(dr); 600 } 601 602 // mark each finished region as successfully split. 603 for (Pair<byte[], byte[]> region : finished) { 604 splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) 605 + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); 606 splitCount++; 607 if (splitCount % 10 == 0) { 608 long tDiff = (System.currentTimeMillis() - startTime) 609 / splitCount; 610 LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount 611 + ". Avg Time / Split = " 612 + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); 613 } 614 } 615 } 616 } 617 if (conf.getBoolean("split.verify", true)) { 618 while (!outstanding.isEmpty()) { 619 LOG.debug("Finally Wait for outstanding splits " + outstanding.size()); 620 LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding, 621 connection, tableName, splitAlgo); 622 if (finished.isEmpty()) { 623 Thread.sleep(30 * 1000); 624 } else { 625 outstanding.removeAll(finished); 626 for (Pair<byte[], byte[]> region : finished) { 627 splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst()) 628 + " " + splitAlgo.rowToStr(region.getSecond()) + "\n"); 629 splitCount++; 630 } 631 LOG.debug("Finally " + finished.size() + " outstanding splits finished"); 632 } 633 } 634 } 635 LOG.debug("All regions have been successfully split!"); 636 } finally { 637 long tDiff = System.currentTimeMillis() - startTime; 638 LOG.debug("TOTAL TIME = " 639 + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); 640 LOG.debug("Splits = " + splitCount); 641 if (0 < splitCount) { 642 LOG.debug("Avg Time / Split = " 643 + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount)); 644 } 645 } 646 } finally { 647 splitOut.close(); 648 fs.delete(splitFile, false); 649 } 650 } 651 } 652 } 653 654 /** 655 * @throws IOException if the specified SplitAlgorithm class couldn't be 656 * instantiated 657 */ 658 public static SplitAlgorithm newSplitAlgoInstance(Configuration conf, 659 String splitClassName) throws IOException { 660 Class<?> splitClass; 661 662 // For split algorithms builtin to RegionSplitter, the user can specify 663 // their simple class name instead of a fully qualified class name. 664 if(splitClassName.equals(HexStringSplit.class.getSimpleName())) { 665 splitClass = HexStringSplit.class; 666 } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) { 667 splitClass = DecimalStringSplit.class; 668 } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) { 669 splitClass = UniformSplit.class; 670 } else { 671 try { 672 splitClass = conf.getClassByName(splitClassName); 673 } catch (ClassNotFoundException e) { 674 throw new IOException("Couldn't load split class " + splitClassName, e); 675 } 676 if(splitClass == null) { 677 throw new IOException("Failed loading split class " + splitClassName); 678 } 679 if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) { 680 throw new IOException( 681 "Specified split class doesn't implement SplitAlgorithm"); 682 } 683 } 684 try { 685 return splitClass.asSubclass(SplitAlgorithm.class).getDeclaredConstructor().newInstance(); 686 } catch (Exception e) { 687 throw new IOException("Problem loading split algorithm: ", e); 688 } 689 } 690 691 static LinkedList<Pair<byte[], byte[]>> splitScan( 692 LinkedList<Pair<byte[], byte[]>> regionList, 693 final Connection connection, 694 final TableName tableName, 695 SplitAlgorithm splitAlgo) 696 throws IOException, InterruptedException { 697 LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList(); 698 LinkedList<Pair<byte[], byte[]>> logicalSplitting = Lists.newLinkedList(); 699 LinkedList<Pair<byte[], byte[]>> physicalSplitting = Lists.newLinkedList(); 700 701 // Get table info 702 Pair<Path, Path> tableDirAndSplitFile = 703 getTableDirAndSplitFile(connection.getConfiguration(), tableName); 704 Path tableDir = tableDirAndSplitFile.getFirst(); 705 FileSystem fs = tableDir.getFileSystem(connection.getConfiguration()); 706 // Clear the cache to forcibly refresh region information 707 ((ClusterConnection)connection).clearRegionLocationCache(); 708 TableDescriptor htd = null; 709 try (Table table = connection.getTable(tableName)) { 710 htd = table.getDescriptor(); 711 } 712 try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) { 713 714 // for every region that hasn't been verified as a finished split 715 for (Pair<byte[], byte[]> region : regionList) { 716 byte[] start = region.getFirst(); 717 byte[] split = region.getSecond(); 718 719 // see if the new split daughter region has come online 720 try { 721 HRegionInfo dri = regionLocator.getRegionLocation(split).getRegionInfo(); 722 if (dri.isOffline() || !Bytes.equals(dri.getStartKey(), split)) { 723 logicalSplitting.add(region); 724 continue; 725 } 726 } catch (NoServerForRegionException nsfre) { 727 // NSFRE will occur if the old hbase:meta entry has no server assigned 728 LOG.info(nsfre.toString(), nsfre); 729 logicalSplitting.add(region); 730 continue; 731 } 732 733 try { 734 // when a daughter region is opened, a compaction is triggered 735 // wait until compaction completes for both daughter regions 736 LinkedList<HRegionInfo> check = Lists.newLinkedList(); 737 check.add(regionLocator.getRegionLocation(start).getRegionInfo()); 738 check.add(regionLocator.getRegionLocation(split).getRegionInfo()); 739 for (HRegionInfo hri : check.toArray(new HRegionInfo[check.size()])) { 740 byte[] sk = hri.getStartKey(); 741 if (sk.length == 0) 742 sk = splitAlgo.firstRow(); 743 744 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 745 connection.getConfiguration(), fs, tableDir, hri, true); 746 747 // Check every Column Family for that region -- check does not have references. 748 boolean refFound = false; 749 for (ColumnFamilyDescriptor c : htd.getColumnFamilies()) { 750 if ((refFound = regionFs.hasReferences(c.getNameAsString()))) { 751 break; 752 } 753 } 754 755 // compaction is completed when all reference files are gone 756 if (!refFound) { 757 check.remove(hri); 758 } 759 } 760 if (check.isEmpty()) { 761 finished.add(region); 762 } else { 763 physicalSplitting.add(region); 764 } 765 } catch (NoServerForRegionException nsfre) { 766 LOG.debug("No Server Exception thrown for: " + splitAlgo.rowToStr(start)); 767 physicalSplitting.add(region); 768 ((ClusterConnection)connection).clearRegionLocationCache(); 769 } 770 } 771 772 LOG.debug("Split Scan: " + finished.size() + " finished / " 773 + logicalSplitting.size() + " split wait / " 774 + physicalSplitting.size() + " reference wait"); 775 776 return finished; 777 } 778 } 779 780 /** 781 * @param conf 782 * @param tableName 783 * @return A Pair where first item is table dir and second is the split file. 784 * @throws IOException if a remote or network exception occurs 785 */ 786 private static Pair<Path, Path> getTableDirAndSplitFile(final Configuration conf, 787 final TableName tableName) throws IOException { 788 Path hbDir = CommonFSUtils.getRootDir(conf); 789 Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName); 790 Path splitFile = new Path(tableDir, "_balancedSplit"); 791 return new Pair<>(tableDir, splitFile); 792 } 793 794 static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection, 795 TableName tableName, SplitAlgorithm splitAlgo) 796 throws IOException { 797 Pair<Path, Path> tableDirAndSplitFile = 798 getTableDirAndSplitFile(connection.getConfiguration(), tableName); 799 Path tableDir = tableDirAndSplitFile.getFirst(); 800 Path splitFile = tableDirAndSplitFile.getSecond(); 801 802 FileSystem fs = tableDir.getFileSystem(connection.getConfiguration()); 803 804 // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false 805 Set<Pair<String, String>> daughterRegions = Sets.newHashSet(); 806 807 // Does a split file exist? 808 if (!fs.exists(splitFile)) { 809 // NO = fresh start. calculate splits to make 810 LOG.debug("No " + splitFile.getName() + " file. Calculating splits "); 811 812 // Query meta for all regions in the table 813 Set<Pair<byte[], byte[]>> rows = Sets.newHashSet(); 814 Pair<byte[][], byte[][]> tmp = null; 815 try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) { 816 tmp = regionLocator.getStartEndKeys(); 817 } 818 Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length, 819 "Start and End rows should be equivalent"); 820 for (int i = 0; i < tmp.getFirst().length; ++i) { 821 byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i]; 822 if (start.length == 0) 823 start = splitAlgo.firstRow(); 824 if (end.length == 0) 825 end = splitAlgo.lastRow(); 826 rows.add(Pair.newPair(start, end)); 827 } 828 LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split."); 829 830 // prepare the split file 831 Path tmpFile = new Path(tableDir, "_balancedSplit_prepare"); 832 FSDataOutputStream tmpOut = fs.create(tmpFile); 833 834 // calculate all the splits == [daughterRegions] = [(start, splitPoint)] 835 for (Pair<byte[], byte[]> r : rows) { 836 byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond()); 837 String startStr = splitAlgo.rowToStr(r.getFirst()); 838 String splitStr = splitAlgo.rowToStr(splitPoint); 839 daughterRegions.add(Pair.newPair(startStr, splitStr)); 840 LOG.debug("Will Split [" + startStr + " , " 841 + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr); 842 tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr 843 + "\n"); 844 } 845 tmpOut.close(); 846 fs.rename(tmpFile, splitFile); 847 } else { 848 LOG.debug("_balancedSplit file found. Replay log to restore state..."); 849 RecoverLeaseFSUtils.recoverFileLease(fs, splitFile, connection.getConfiguration(), null); 850 851 // parse split file and process remaining splits 852 FSDataInputStream tmpIn = fs.open(splitFile); 853 StringBuilder sb = new StringBuilder(tmpIn.available()); 854 while (tmpIn.available() > 0) { 855 sb.append(tmpIn.readChar()); 856 } 857 tmpIn.close(); 858 for (String line : sb.toString().split("\n")) { 859 String[] cmd = line.split(splitAlgo.separator()); 860 Preconditions.checkArgument(3 == cmd.length); 861 byte[] start = splitAlgo.strToRow(cmd[1]); 862 String startStr = splitAlgo.rowToStr(start); 863 byte[] splitPoint = splitAlgo.strToRow(cmd[2]); 864 String splitStr = splitAlgo.rowToStr(splitPoint); 865 Pair<String, String> r = Pair.newPair(startStr, splitStr); 866 if (cmd[0].equals("+")) { 867 LOG.debug("Adding: " + r); 868 daughterRegions.add(r); 869 } else { 870 LOG.debug("Removing: " + r); 871 Preconditions.checkArgument(cmd[0].equals("-"), 872 "Unknown option: " + cmd[0]); 873 Preconditions.checkState(daughterRegions.contains(r), 874 "Missing row: " + r); 875 daughterRegions.remove(r); 876 } 877 } 878 LOG.debug("Done reading. " + daughterRegions.size() + " regions left."); 879 } 880 LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList(); 881 for (Pair<String, String> r : daughterRegions) { 882 ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo 883 .strToRow(r.getSecond()))); 884 } 885 return ret; 886 } 887 888 /** 889 * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region 890 * boundaries. The format of a HexStringSplit region boundary is the ASCII 891 * representation of an MD5 checksum, or any other uniformly distributed 892 * hexadecimal value. Row are hex-encoded long values in the range 893 * <b>"00000000" => "FFFFFFFF"</b> and are left-padded with zeros to keep the 894 * same order lexicographically as if they were binary. 895 * 896 * Since this split algorithm uses hex strings as keys, it is easy to read & 897 * write in the shell but takes up more space and may be non-intuitive. 898 */ 899 public static class HexStringSplit extends NumberStringSplit { 900 final static String DEFAULT_MIN_HEX = "00000000"; 901 final static String DEFAULT_MAX_HEX = "FFFFFFFF"; 902 final static int RADIX_HEX = 16; 903 904 public HexStringSplit() { 905 super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX); 906 } 907 908 } 909 910 /** 911 * The format of a DecimalStringSplit region boundary is the ASCII representation of 912 * reversed sequential number, or any other uniformly distributed decimal value. 913 * Row are decimal-encoded long values in the range 914 * <b>"00000000" => "99999999"</b> and are left-padded with zeros to keep the 915 * same order lexicographically as if they were binary. 916 */ 917 public static class DecimalStringSplit extends NumberStringSplit { 918 final static String DEFAULT_MIN_DEC = "00000000"; 919 final static String DEFAULT_MAX_DEC = "99999999"; 920 final static int RADIX_DEC = 10; 921 922 public DecimalStringSplit() { 923 super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC); 924 } 925 926 } 927 928 public abstract static class NumberStringSplit implements SplitAlgorithm { 929 930 String firstRow; 931 BigInteger firstRowInt; 932 String lastRow; 933 BigInteger lastRowInt; 934 int rowComparisonLength; 935 int radix; 936 937 NumberStringSplit(String minRow, String maxRow, int radix) { 938 this.firstRow = minRow; 939 this.lastRow = maxRow; 940 this.radix = radix; 941 this.firstRowInt = BigInteger.ZERO; 942 this.lastRowInt = new BigInteger(lastRow, this.radix); 943 this.rowComparisonLength = lastRow.length(); 944 } 945 946 @Override 947 public byte[] split(byte[] start, byte[] end) { 948 BigInteger s = convertToBigInteger(start); 949 BigInteger e = convertToBigInteger(end); 950 Preconditions.checkArgument(!e.equals(BigInteger.ZERO)); 951 return convertToByte(split2(s, e)); 952 } 953 954 @Override 955 public byte[][] split(int n) { 956 Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0, 957 "last row (%s) is configured less than first row (%s)", lastRow, 958 firstRow); 959 // +1 to range because the last row is inclusive 960 BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE); 961 Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0, 962 "split granularity (%s) is greater than the range (%s)", n, range); 963 964 BigInteger[] splits = new BigInteger[n - 1]; 965 BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n)); 966 for (int i = 1; i < n; i++) { 967 // NOTE: this means the last region gets all the slop. 968 // This is not a big deal if we're assuming n << MAXHEX 969 splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger 970 .valueOf(i))); 971 } 972 return convertToBytes(splits); 973 } 974 975 @Override 976 public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) { 977 BigInteger s = convertToBigInteger(start); 978 BigInteger e = convertToBigInteger(end); 979 980 Preconditions.checkArgument(e.compareTo(s) > 0, 981 "last row (%s) is configured less than first row (%s)", rowToStr(end), 982 end); 983 // +1 to range because the last row is inclusive 984 BigInteger range = e.subtract(s).add(BigInteger.ONE); 985 Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0, 986 "split granularity (%s) is greater than the range (%s)", numSplits, range); 987 988 BigInteger[] splits = new BigInteger[numSplits - 1]; 989 BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits)); 990 for (int i = 1; i < numSplits; i++) { 991 // NOTE: this means the last region gets all the slop. 992 // This is not a big deal if we're assuming n << MAXHEX 993 splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger 994 .valueOf(i))); 995 } 996 997 if (inclusive) { 998 BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1]; 999 inclusiveSplitPoints[0] = convertToBigInteger(start); 1000 inclusiveSplitPoints[numSplits] = convertToBigInteger(end); 1001 System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length); 1002 return convertToBytes(inclusiveSplitPoints); 1003 } else { 1004 return convertToBytes(splits); 1005 } 1006 } 1007 1008 @Override 1009 public byte[] firstRow() { 1010 return convertToByte(firstRowInt); 1011 } 1012 1013 @Override 1014 public byte[] lastRow() { 1015 return convertToByte(lastRowInt); 1016 } 1017 1018 @Override 1019 public void setFirstRow(String userInput) { 1020 firstRow = userInput; 1021 firstRowInt = new BigInteger(firstRow, radix); 1022 } 1023 1024 @Override 1025 public void setLastRow(String userInput) { 1026 lastRow = userInput; 1027 lastRowInt = new BigInteger(lastRow, radix); 1028 // Precondition: lastRow > firstRow, so last's length is the greater 1029 rowComparisonLength = lastRow.length(); 1030 } 1031 1032 @Override 1033 public byte[] strToRow(String in) { 1034 return convertToByte(new BigInteger(in, radix)); 1035 } 1036 1037 @Override 1038 public String rowToStr(byte[] row) { 1039 return Bytes.toStringBinary(row); 1040 } 1041 1042 @Override 1043 public String separator() { 1044 return " "; 1045 } 1046 1047 @Override 1048 public void setFirstRow(byte[] userInput) { 1049 firstRow = Bytes.toString(userInput); 1050 } 1051 1052 @Override 1053 public void setLastRow(byte[] userInput) { 1054 lastRow = Bytes.toString(userInput); 1055 } 1056 1057 /** 1058 * Divide 2 numbers in half (for split algorithm) 1059 * 1060 * @param a number #1 1061 * @param b number #2 1062 * @return the midpoint of the 2 numbers 1063 */ 1064 public BigInteger split2(BigInteger a, BigInteger b) { 1065 return a.add(b).divide(BigInteger.valueOf(2)).abs(); 1066 } 1067 1068 /** 1069 * Returns an array of bytes corresponding to an array of BigIntegers 1070 * 1071 * @param bigIntegers numbers to convert 1072 * @return bytes corresponding to the bigIntegers 1073 */ 1074 public byte[][] convertToBytes(BigInteger[] bigIntegers) { 1075 byte[][] returnBytes = new byte[bigIntegers.length][]; 1076 for (int i = 0; i < bigIntegers.length; i++) { 1077 returnBytes[i] = convertToByte(bigIntegers[i]); 1078 } 1079 return returnBytes; 1080 } 1081 1082 /** 1083 * Returns the bytes corresponding to the BigInteger 1084 * 1085 * @param bigInteger number to convert 1086 * @param pad padding length 1087 * @return byte corresponding to input BigInteger 1088 */ 1089 public byte[] convertToByte(BigInteger bigInteger, int pad) { 1090 String bigIntegerString = bigInteger.toString(radix); 1091 bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0'); 1092 return Bytes.toBytes(bigIntegerString); 1093 } 1094 1095 /** 1096 * Returns the bytes corresponding to the BigInteger 1097 * 1098 * @param bigInteger number to convert 1099 * @return corresponding bytes 1100 */ 1101 public byte[] convertToByte(BigInteger bigInteger) { 1102 return convertToByte(bigInteger, rowComparisonLength); 1103 } 1104 1105 /** 1106 * Returns the BigInteger represented by the byte array 1107 * 1108 * @param row byte array representing row 1109 * @return the corresponding BigInteger 1110 */ 1111 public BigInteger convertToBigInteger(byte[] row) { 1112 return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix) 1113 : BigInteger.ZERO; 1114 } 1115 1116 @Override 1117 public String toString() { 1118 return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) 1119 + "," + rowToStr(lastRow()) + "]"; 1120 } 1121 } 1122 1123 /** 1124 * A SplitAlgorithm that divides the space of possible keys evenly. Useful 1125 * when the keys are approximately uniform random bytes (e.g. hashes). Rows 1126 * are raw byte values in the range <b>00 => FF</b> and are right-padded with 1127 * zeros to keep the same memcmp() order. This is the natural algorithm to use 1128 * for a byte[] environment and saves space, but is not necessarily the 1129 * easiest for readability. 1130 */ 1131 public static class UniformSplit implements SplitAlgorithm { 1132 static final byte xFF = (byte) 0xFF; 1133 byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY; 1134 byte[] lastRowBytes = 1135 new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF}; 1136 @Override 1137 public byte[] split(byte[] start, byte[] end) { 1138 return Bytes.split(start, end, 1)[1]; 1139 } 1140 1141 @Override 1142 public byte[][] split(int numRegions) { 1143 Preconditions.checkArgument( 1144 Bytes.compareTo(lastRowBytes, firstRowBytes) > 0, 1145 "last row (%s) is configured less than first row (%s)", 1146 Bytes.toStringBinary(lastRowBytes), 1147 Bytes.toStringBinary(firstRowBytes)); 1148 1149 byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true, 1150 numRegions - 1); 1151 Preconditions.checkState(splits != null, 1152 "Could not split region with given user input: " + this); 1153 1154 // remove endpoints, which are included in the splits list 1155 1156 return splits == null? null: Arrays.copyOfRange(splits, 1, splits.length - 1); 1157 } 1158 1159 @Override 1160 public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) { 1161 if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) { 1162 start = firstRowBytes; 1163 } 1164 if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) { 1165 end = lastRowBytes; 1166 } 1167 Preconditions.checkArgument( 1168 Bytes.compareTo(end, start) > 0, 1169 "last row (%s) is configured less than first row (%s)", 1170 Bytes.toStringBinary(end), 1171 Bytes.toStringBinary(start)); 1172 1173 byte[][] splits = Bytes.split(start, end, true, 1174 numSplits - 1); 1175 Preconditions.checkState(splits != null, 1176 "Could not calculate input splits with given user input: " + this); 1177 if (inclusive) { 1178 return splits; 1179 } else { 1180 // remove endpoints, which are included in the splits list 1181 return Arrays.copyOfRange(splits, 1, splits.length - 1); 1182 } 1183 } 1184 1185 @Override 1186 public byte[] firstRow() { 1187 return firstRowBytes; 1188 } 1189 1190 @Override 1191 public byte[] lastRow() { 1192 return lastRowBytes; 1193 } 1194 1195 @Override 1196 public void setFirstRow(String userInput) { 1197 firstRowBytes = Bytes.toBytesBinary(userInput); 1198 } 1199 1200 @Override 1201 public void setLastRow(String userInput) { 1202 lastRowBytes = Bytes.toBytesBinary(userInput); 1203 } 1204 1205 1206 @Override 1207 public void setFirstRow(byte[] userInput) { 1208 firstRowBytes = userInput; 1209 } 1210 1211 @Override 1212 public void setLastRow(byte[] userInput) { 1213 lastRowBytes = userInput; 1214 } 1215 1216 @Override 1217 public byte[] strToRow(String input) { 1218 return Bytes.toBytesBinary(input); 1219 } 1220 1221 @Override 1222 public String rowToStr(byte[] row) { 1223 return Bytes.toStringBinary(row); 1224 } 1225 1226 @Override 1227 public String separator() { 1228 return ","; 1229 } 1230 1231 @Override 1232 public String toString() { 1233 return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) 1234 + "," + rowToStr(lastRow()) + "]"; 1235 } 1236 } 1237}