Source code

001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.util;
020
021import java.io.IOException;
022import java.math.BigInteger;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.LinkedList;
026import java.util.List;
027import java.util.Map;
028import java.util.Set;
029import java.util.TreeMap;
030import org.apache.commons.lang3.ArrayUtils;
031import org.apache.commons.lang3.StringUtils;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.FSDataInputStream;
034import org.apache.hadoop.fs.FSDataOutputStream;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.hbase.HBaseConfiguration;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.HRegionInfo;
040import org.apache.hadoop.hbase.HRegionLocation;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.yetus.audience.InterfaceAudience;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048import org.apache.hadoop.hbase.client.Admin;
049import org.apache.hadoop.hbase.client.ClusterConnection;
050import org.apache.hadoop.hbase.client.Connection;
051import org.apache.hadoop.hbase.client.ConnectionFactory;
052import org.apache.hadoop.hbase.client.NoServerForRegionException;
053import org.apache.hadoop.hbase.client.RegionLocator;
054import org.apache.hadoop.hbase.client.Table;
055import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
056
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
058import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
059import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
060import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
061import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
062import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
063import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
064import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
065import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
066import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionBuilder;
067import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
068import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
069
070/**
071 * The {@link RegionSplitter} class provides several utilities to help in the
072 * administration lifecycle for developers who choose to manually split regions
073 * instead of having HBase handle that automatically. The most useful utilities
074 * are:
075 * <p>
076 * <ul>
077 * <li>Create a table with a specified number of pre-split regions
078 * <li>Execute a rolling split of all regions on an existing table
079 * </ul>
080 * <p>
081 * Both operations can be safely done on a live server.
082 * <p>
083 * <b>Question:</b> How do I turn off automatic splitting? <br>
084 * <b>Answer:</b> Automatic splitting is determined by the configuration value
085 * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this
086 * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting
087 * is 100GB, which would result in &gt; 1hr major compactions if reached.
088 * <p>
089 * <b>Question:</b> Why did the original authors decide to manually split? <br>
090 * <b>Answer:</b> Specific workload characteristics of our use case allowed us
091 * to benefit from a manual split system.
092 * <p>
093 * <ul>
094 * <li>Data (~1k) that would grow instead of being replaced
095 * <li>Data growth was roughly uniform across all regions
096 * <li>OLTP workload. Data loss is a big deal.
097 * </ul>
098 * <p>
099 * <b>Question:</b> Why is manual splitting good for this workload? <br>
100 * <b>Answer:</b> Although automated splitting is not a bad option, there are
101 * benefits to manual splitting.
102 * <p>
103 * <ul>
104 * <li>With growing amounts of data, splits will continually be needed. Since
105 * you always know exactly what regions you have, long-term debugging and
106 * profiling is much easier with manual splits. It is hard to trace the logs to
107 * understand region level problems if it keeps splitting and getting renamed.
108 * <li>Data offlining bugs + unknown number of split regions == oh crap! If an
109 * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and
110 * you notice it a day or so later, you can be assured that the regions
111 * specified in these files are the same as the current regions and you have
112 * less headaches trying to restore/replay your data.
113 * <li>You can finely tune your compaction algorithm. With roughly uniform data
114 * growth, it's easy to cause split / compaction storms as the regions all
115 * roughly hit the same data size at the same time. With manual splits, you can
116 * let staggered, time-based major compactions spread out your network IO load.
117 * </ul>
118 * <p>
119 * <b>Question:</b> What's the optimal number of pre-split regions to create? <br>
120 * <b>Answer:</b> Mileage will vary depending upon your application.
121 * <p>
122 * The short answer for our application is that we started with 10 pre-split
123 * regions / server and watched our data growth over time. It's better to err on
124 * the side of too little regions and rolling split later.
125 * <p>
126 * The more complicated answer is that this depends upon the largest storefile
127 * in your region. With a growing data size, this will get larger over time. You
128 * want the largest region to be just big enough that the
129 * {@link org.apache.hadoop.hbase.regionserver.HStore} compact
130 * selection algorithm only compacts it due to a timed major. If you don't, your
131 * cluster can be prone to compaction storms as the algorithm decides to run
132 * major compactions on a large series of regions all at once. Note that
133 * compaction storms are due to the uniform data growth, not the manual split
134 * decision.
135 * <p>
136 * If you pre-split your regions too thin, you can increase the major compaction
137 * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size
138 * grows too large, use this script to perform a network IO safe rolling split
139 * of all regions.
140 */
141@InterfaceAudience.Private
142public class RegionSplitter {
143  private static final Logger LOG = LoggerFactory.getLogger(RegionSplitter.class);
144
145  /**
146   * A generic interface for the RegionSplitter code to use for all it's
147   * functionality. Note that the original authors of this code use
148   * {@link HexStringSplit} to partition their table and set it as default, but
149   * provided this for your custom algorithm. To use, create a new derived class
150   * from this interface and call {@link RegionSplitter#createPresplitTable} or
151   * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the
152   * argument splitClassName giving the name of your class.
153   */
154  public interface SplitAlgorithm {
155    /**
156     * Split a pre-existing region into 2 regions.
157     *
158     * @param start
159     *          first row (inclusive)
160     * @param end
161     *          last row (exclusive)
162     * @return the split row to use
163     */
164    byte[] split(byte[] start, byte[] end);
165
166    /**
167     * Split an entire table.
168     *
169     * @param numRegions
170     *          number of regions to split the table into
171     *
172     * @throws RuntimeException
173     *           user input is validated at this time. may throw a runtime
174     *           exception in response to a parse failure
175     * @return array of split keys for the initial regions of the table. The
176     *         length of the returned array should be numRegions-1.
177     */
178    byte[][] split(int numRegions);
179
180    /**
181     * Some MapReduce jobs may want to run multiple mappers per region,
182     * this is intended for such usecase.
183     *
184     * @param start first row (inclusive)
185     * @param end last row (exclusive)
186     * @param numSplits number of splits to generate
187     * @param inclusive whether start and end are returned as split points
188     */
189    byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);
190
191    /**
192     * In HBase, the first row is represented by an empty byte array. This might
193     * cause problems with your split algorithm or row printing. All your APIs
194     * will be passed firstRow() instead of empty array.
195     *
196     * @return your representation of your first row
197     */
198    byte[] firstRow();
199
200    /**
201     * In HBase, the last row is represented by an empty byte array. This might
202     * cause problems with your split algorithm or row printing. All your APIs
203     * will be passed firstRow() instead of empty array.
204     *
205     * @return your representation of your last row
206     */
207    byte[] lastRow();
208
209    /**
210     * In HBase, the last row is represented by an empty byte array. Set this
211     * value to help the split code understand how to evenly divide the first
212     * region.
213     *
214     * @param userInput
215     *          raw user input (may throw RuntimeException on parse failure)
216     */
217    void setFirstRow(String userInput);
218
219    /**
220     * In HBase, the last row is represented by an empty byte array. Set this
221     * value to help the split code understand how to evenly divide the last
222     * region. Note that this last row is inclusive for all rows sharing the
223     * same prefix.
224     *
225     * @param userInput
226     *          raw user input (may throw RuntimeException on parse failure)
227     */
228    void setLastRow(String userInput);
229
230    /**
231     * @param input
232     *          user or file input for row
233     * @return byte array representation of this row for HBase
234     */
235    byte[] strToRow(String input);
236
237    /**
238     * @param row
239     *          byte array representing a row in HBase
240     * @return String to use for debug &amp; file printing
241     */
242    String rowToStr(byte[] row);
243
244    /**
245     * @return the separator character to use when storing / printing the row
246     */
247    String separator();
248
249    /**
250     * Set the first row
251     * @param userInput byte array of the row key.
252     */
253    void setFirstRow(byte[] userInput);
254
255    /**
256     * Set the last row
257     * @param userInput byte array of the row key.
258     */
259    void setLastRow(byte[] userInput);
260  }
261
262  /**
263   * The main function for the RegionSplitter application. Common uses:
264   * <p>
265   * <ul>
266   * <li>create a table named 'myTable' with 60 pre-split regions containing 2
267   * column families 'test' &amp; 'rs', assuming the keys are hex-encoded ASCII:
268   * <ul>
269   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
270   * myTable HexStringSplit
271   * </ul>
272   * <li>create a table named 'myTable' with 50 pre-split regions,
273   * assuming the keys are decimal-encoded ASCII:
274   * <ul>
275   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
276   * myTable DecimalStringSplit
277   * </ul>
278   * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2
279   * outstanding splits at a time, assuming keys are uniformly distributed
280   * bytes:
281   * <ul>
282   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
283   * UniformSplit
284   * </ul>
285   * </ul>
286   *
287   * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
288   * DecimalStringSplit, and UniformSplit. These are different strategies for
289   * choosing region boundaries. See their source code for details.
290   *
291   * @param args
292   *          Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;
293   *          &lt;-c &lt;# regions&gt; -f &lt;family:family:...&gt; | -r
294   *          [-o &lt;# outstanding splits&gt;]&gt;
295   *          [-D &lt;conf.param=value&gt;]
296   * @throws IOException
297   *           HBase IO problem
298   * @throws InterruptedException
299   *           user requested exit
300   * @throws ParseException
301   *           problem parsing user input
302   */
303  @SuppressWarnings("static-access")
304  public static void main(String[] args) throws IOException,
305      InterruptedException, ParseException {
306    Configuration conf = HBaseConfiguration.create();
307
308    // parse user input
309    Options opt = new Options();
310    opt.addOption(OptionBuilder.withArgName("property=value").hasArg()
311        .withDescription("Override HBase Configuration Settings").create("D"));
312    opt.addOption(OptionBuilder.withArgName("region count").hasArg()
313        .withDescription(
314            "Create a new table with a pre-split number of regions")
315        .create("c"));
316    opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg()
317        .withDescription(
318            "Column Families to create with new table.  Required with -c")
319        .create("f"));
320    opt.addOption("h", false, "Print this usage help");
321    opt.addOption("r", false, "Perform a rolling split of an existing region");
322    opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription(
323        "Max outstanding splits that have unfinished major compactions")
324        .create("o"));
325    opt.addOption(null, "firstrow", true,
326        "First Row in Table for Split Algorithm");
327    opt.addOption(null, "lastrow", true,
328        "Last Row in Table for Split Algorithm");
329    opt.addOption(null, "risky", false,
330        "Skip verification steps to complete quickly. "
331            + "STRONGLY DISCOURAGED for production systems.  ");
332    CommandLine cmd = new GnuParser().parse(opt, args);
333
334    if (cmd.hasOption("D")) {
335      for (String confOpt : cmd.getOptionValues("D")) {
336        String[] kv = confOpt.split("=", 2);
337        if (kv.length == 2) {
338          conf.set(kv[0], kv[1]);
339          LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]);
340        } else {
341          throw new ParseException("-D option format invalid: " + confOpt);
342        }
343      }
344    }
345
346    if (cmd.hasOption("risky")) {
347      conf.setBoolean("split.verify", false);
348    }
349
350    boolean createTable = cmd.hasOption("c") && cmd.hasOption("f");
351    boolean rollingSplit = cmd.hasOption("r");
352    boolean oneOperOnly = createTable ^ rollingSplit;
353
354    if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
355      new HelpFormatter().printHelp("bin/hbase regionsplitter <TABLE> <SPLITALGORITHM>\n"+
356          "SPLITALGORITHM is the java class name of a class implementing " +
357          "SplitAlgorithm, or one of the special strings HexStringSplit or " +
358          "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
359          "HexStringSplit treats keys as hexadecimal ASCII, and " +
360          "DecimalStringSplit treats keys as decimal ASCII, and " +
361          "UniformSplit treats keys as arbitrary bytes.", opt);
362      return;
363    }
364    TableName tableName = TableName.valueOf(cmd.getArgs()[0]);
365    String splitClass = cmd.getArgs()[1];
366    SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass);
367
368    if (cmd.hasOption("firstrow")) {
369      splitAlgo.setFirstRow(cmd.getOptionValue("firstrow"));
370    }
371    if (cmd.hasOption("lastrow")) {
372      splitAlgo.setLastRow(cmd.getOptionValue("lastrow"));
373    }
374
375    if (createTable) {
376      conf.set("split.count", cmd.getOptionValue("c"));
377      createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf);
378    }
379
380    if (rollingSplit) {
381      if (cmd.hasOption("o")) {
382        conf.set("split.outstanding", cmd.getOptionValue("o"));
383      }
384      rollingSplit(tableName, splitAlgo, conf);
385    }
386  }
387
388  static void createPresplitTable(TableName tableName, SplitAlgorithm splitAlgo,
389          String[] columnFamilies, Configuration conf)
390  throws IOException, InterruptedException {
391    final int splitCount = conf.getInt("split.count", 0);
392    Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");
393
394    Preconditions.checkArgument(columnFamilies.length > 0,
395        "Must specify at least one column family. ");
396    LOG.debug("Creating table " + tableName + " with " + columnFamilies.length
397        + " column families.  Presplitting to " + splitCount + " regions");
398
399    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
400    for (String cf : columnFamilies) {
401      builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));
402    }
403    try (Connection connection = ConnectionFactory.createConnection(conf)) {
404      Admin admin = connection.getAdmin();
405      try {
406        Preconditions.checkArgument(!admin.tableExists(tableName),
407          "Table already exists: " + tableName);
408        admin.createTable(builder.build(), splitAlgo.split(splitCount));
409      } finally {
410        admin.close();
411      }
412      LOG.debug("Table created!  Waiting for regions to show online in META...");
413      if (!conf.getBoolean("split.verify", true)) {
414        // NOTE: createTable is synchronous on the table, but not on the regions
415        int onlineRegions = 0;
416        try (RegionLocator locator = connection.getRegionLocator(tableName)) {
417          while (onlineRegions < splitCount) {
418            onlineRegions = locator.getAllRegionLocations().size();
419            LOG.debug(onlineRegions + " of " + splitCount + " regions online...");
420            if (onlineRegions < splitCount) {
421              Thread.sleep(10 * 1000); // sleep
422            }
423          }
424        }
425      }
426      LOG.debug("Finished creating table with " + splitCount + " regions");
427    }
428  }
429
430  /**
431   * Alternative getCurrentNrHRS which is no longer available.
432   * @param connection
433   * @return Rough count of regionservers out on cluster.
434   * @throws IOException if a remote or network exception occurs
435   */
436  private static int getRegionServerCount(final Connection connection) throws IOException {
437    try (Admin admin = connection.getAdmin()) {
438      Collection<ServerName> servers = admin.getRegionServers();
439      return servers == null || servers.isEmpty()? 0: servers.size();
440    }
441  }
442
443  private static byte [] readFile(final FileSystem fs, final Path path) throws IOException {
444    FSDataInputStream tmpIn = fs.open(path);
445    try {
446      byte [] rawData = new byte[tmpIn.available()];
447      tmpIn.readFully(rawData);
448      return rawData;
449    } finally {
450      tmpIn.close();
451    }
452  }
453
454  static void rollingSplit(TableName tableName, SplitAlgorithm splitAlgo, Configuration conf)
455  throws IOException, InterruptedException {
456    final int minOS = conf.getInt("split.outstanding", 2);
457    try (Connection connection = ConnectionFactory.createConnection(conf)) {
458      // Max outstanding splits. default == 50% of servers
459      final int MAX_OUTSTANDING = Math.max(getRegionServerCount(connection) / 2, minOS);
460
461      Path hbDir = CommonFSUtils.getRootDir(conf);
462      Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName);
463      Path splitFile = new Path(tableDir, "_balancedSplit");
464      FileSystem fs = FileSystem.get(conf);
465
466      // Get a list of daughter regions to create
467      LinkedList<Pair<byte[], byte[]>> tmpRegionSet = null;
468      try (Table table = connection.getTable(tableName)) {
469        tmpRegionSet = getSplits(connection, tableName, splitAlgo);
470      }
471      LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList();
472      int splitCount = 0;
473      final int origCount = tmpRegionSet.size();
474
475      // all splits must compact & we have 1 compact thread, so 2 split
476      // requests to the same RS can stall the outstanding split queue.
477      // To fix, group the regions into an RS pool and round-robin through it
478      LOG.debug("Bucketing regions by regionserver...");
479      TreeMap<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegions =
480          Maps.newTreeMap();
481      // Get a regionLocator.  Need it in below.
482      try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
483        for (Pair<byte[], byte[]> dr : tmpRegionSet) {
484          ServerName rsLocation = regionLocator.getRegionLocation(dr.getSecond()).getServerName();
485          if (!daughterRegions.containsKey(rsLocation)) {
486            LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
487            daughterRegions.put(rsLocation, entry);
488          }
489          daughterRegions.get(rsLocation).add(dr);
490        }
491        LOG.debug("Done with bucketing.  Split time!");
492        long startTime = System.currentTimeMillis();
493
494        // Open the split file and modify it as splits finish
495        byte[] rawData = readFile(fs, splitFile);
496
497        FSDataOutputStream splitOut = fs.create(splitFile);
498        try {
499          splitOut.write(rawData);
500
501          try {
502            // *** split code ***
503            while (!daughterRegions.isEmpty()) {
504              LOG.debug(daughterRegions.size() + " RS have regions to splt.");
505
506              // Get ServerName to region count mapping
507              final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap();
508              List<HRegionLocation> hrls = regionLocator.getAllRegionLocations();
509              for (HRegionLocation hrl: hrls) {
510                ServerName sn = hrl.getServerName();
511                if (rsSizes.containsKey(sn)) {
512                  rsSizes.put(sn, rsSizes.get(sn) + 1);
513                } else {
514                  rsSizes.put(sn, 1);
515                }
516              }
517
518              // Round-robin through the ServerName list. Choose the lightest-loaded servers
519              // first to keep the master from load-balancing regions as we split.
520              for (Map.Entry<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegion :
521                      daughterRegions.entrySet()) {
522                Pair<byte[], byte[]> dr = null;
523                ServerName rsLoc = daughterRegion.getKey();
524                LinkedList<Pair<byte[], byte[]>> regionList = daughterRegion.getValue();
525
526                // Find a region in the ServerName list that hasn't been moved
527                LOG.debug("Finding a region on " + rsLoc);
528                while (!regionList.isEmpty()) {
529                  dr = regionList.pop();
530
531                  // get current region info
532                  byte[] split = dr.getSecond();
533                  HRegionLocation regionLoc = regionLocator.getRegionLocation(split);
534
535                  // if this region moved locations
536                  ServerName newRs = regionLoc.getServerName();
537                  if (newRs.compareTo(rsLoc) != 0) {
538                    LOG.debug("Region with " + splitAlgo.rowToStr(split)
539                        + " moved to " + newRs + ". Relocating...");
540                    // relocate it, don't use it right now
541                    if (!daughterRegions.containsKey(newRs)) {
542                      LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
543                      daughterRegions.put(newRs, entry);
544                    }
545                    daughterRegions.get(newRs).add(dr);
546                    dr = null;
547                    continue;
548                  }
549
550                  // make sure this region wasn't already split
551                  byte[] sk = regionLoc.getRegionInfo().getStartKey();
552                  if (sk.length != 0) {
553                    if (Bytes.equals(split, sk)) {
554                      LOG.debug("Region already split on "
555                          + splitAlgo.rowToStr(split) + ".  Skipping this region...");
556                      ++splitCount;
557                      dr = null;
558                      continue;
559                    }
560                    byte[] start = dr.getFirst();
561                    Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo
562                        .rowToStr(start) + " != " + splitAlgo.rowToStr(sk));
563                  }
564
565                  // passed all checks! found a good region
566                  break;
567                }
568                if (regionList.isEmpty()) {
569                  daughterRegions.remove(rsLoc);
570                }
571                if (dr == null)
572                  continue;
573
574                // we have a good region, time to split!
575                byte[] split = dr.getSecond();
576                LOG.debug("Splitting at " + splitAlgo.rowToStr(split));
577                try (Admin admin = connection.getAdmin()) {
578                  admin.split(tableName, split);
579                }
580
581                LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();
582                LinkedList<Pair<byte[], byte[]>> local_finished = Lists.newLinkedList();
583                if (conf.getBoolean("split.verify", true)) {
584                  // we need to verify and rate-limit our splits
585                  outstanding.addLast(dr);
586                  // with too many outstanding splits, wait for some to finish
587                  while (outstanding.size() >= MAX_OUTSTANDING) {
588                    LOG.debug("Wait for outstanding splits " + outstanding.size());
589                    local_finished = splitScan(outstanding, connection, tableName, splitAlgo);
590                    if (local_finished.isEmpty()) {
591                      Thread.sleep(30 * 1000);
592                    } else {
593                      finished.addAll(local_finished);
594                      outstanding.removeAll(local_finished);
595                      LOG.debug(local_finished.size() + " outstanding splits finished");
596                    }
597                  }
598                } else {
599                  finished.add(dr);
600                }
601
602                // mark each finished region as successfully split.
603                for (Pair<byte[], byte[]> region : finished) {
604                  splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
605                      + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
606                  splitCount++;
607                  if (splitCount % 10 == 0) {
608                    long tDiff = (System.currentTimeMillis() - startTime)
609                        / splitCount;
610                    LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount
611                        + ". Avg Time / Split = "
612                        + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
613                  }
614                }
615              }
616            }
617            if (conf.getBoolean("split.verify", true)) {
618              while (!outstanding.isEmpty()) {
619                LOG.debug("Finally Wait for outstanding splits " + outstanding.size());
620                LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding,
621                    connection, tableName, splitAlgo);
622                if (finished.isEmpty()) {
623                  Thread.sleep(30 * 1000);
624                } else {
625                  outstanding.removeAll(finished);
626                  for (Pair<byte[], byte[]> region : finished) {
627                    splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
628                        + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
629                    splitCount++;
630                  }
631                  LOG.debug("Finally " + finished.size() + " outstanding splits finished");
632                }
633              }
634            }
635            LOG.debug("All regions have been successfully split!");
636          } finally {
637            long tDiff = System.currentTimeMillis() - startTime;
638            LOG.debug("TOTAL TIME = "
639                + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
640            LOG.debug("Splits = " + splitCount);
641            if (0 < splitCount) {
642              LOG.debug("Avg Time / Split = "
643                  + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount));
644            }
645          }
646        } finally {
647          splitOut.close();
648          fs.delete(splitFile, false);
649        }
650      }
651    }
652  }
653
654  /**
655   * @throws IOException if the specified SplitAlgorithm class couldn't be
656   * instantiated
657   */
658  public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
659          String splitClassName) throws IOException {
660    Class<?> splitClass;
661
662    // For split algorithms builtin to RegionSplitter, the user can specify
663    // their simple class name instead of a fully qualified class name.
664    if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
665      splitClass = HexStringSplit.class;
666    } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
667      splitClass = DecimalStringSplit.class;
668    } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
669      splitClass = UniformSplit.class;
670    } else {
671      try {
672        splitClass = conf.getClassByName(splitClassName);
673      } catch (ClassNotFoundException e) {
674        throw new IOException("Couldn't load split class " + splitClassName, e);
675      }
676      if(splitClass == null) {
677        throw new IOException("Failed loading split class " + splitClassName);
678      }
679      if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
680        throw new IOException(
681                "Specified split class doesn't implement SplitAlgorithm");
682      }
683    }
684    try {
685      return splitClass.asSubclass(SplitAlgorithm.class).getDeclaredConstructor().newInstance();
686    } catch (Exception e) {
687      throw new IOException("Problem loading split algorithm: ", e);
688    }
689  }
690
691  static LinkedList<Pair<byte[], byte[]>> splitScan(
692      LinkedList<Pair<byte[], byte[]>> regionList,
693      final Connection connection,
694      final TableName tableName,
695      SplitAlgorithm splitAlgo)
696      throws IOException, InterruptedException {
697    LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();
698    LinkedList<Pair<byte[], byte[]>> logicalSplitting = Lists.newLinkedList();
699    LinkedList<Pair<byte[], byte[]>> physicalSplitting = Lists.newLinkedList();
700
701    // Get table info
702    Pair<Path, Path> tableDirAndSplitFile =
703      getTableDirAndSplitFile(connection.getConfiguration(), tableName);
704    Path tableDir = tableDirAndSplitFile.getFirst();
705    FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
706    // Clear the cache to forcibly refresh region information
707    ((ClusterConnection)connection).clearRegionLocationCache();
708    TableDescriptor htd = null;
709    try (Table table = connection.getTable(tableName)) {
710      htd = table.getDescriptor();
711    }
712    try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
713
714      // for every region that hasn't been verified as a finished split
715      for (Pair<byte[], byte[]> region : regionList) {
716        byte[] start = region.getFirst();
717        byte[] split = region.getSecond();
718
719        // see if the new split daughter region has come online
720        try {
721          HRegionInfo dri = regionLocator.getRegionLocation(split).getRegionInfo();
722          if (dri.isOffline() || !Bytes.equals(dri.getStartKey(), split)) {
723            logicalSplitting.add(region);
724            continue;
725          }
726        } catch (NoServerForRegionException nsfre) {
727          // NSFRE will occur if the old hbase:meta entry has no server assigned
728          LOG.info(nsfre.toString(), nsfre);
729          logicalSplitting.add(region);
730          continue;
731        }
732
733        try {
734          // when a daughter region is opened, a compaction is triggered
735          // wait until compaction completes for both daughter regions
736          LinkedList<HRegionInfo> check = Lists.newLinkedList();
737          check.add(regionLocator.getRegionLocation(start).getRegionInfo());
738          check.add(regionLocator.getRegionLocation(split).getRegionInfo());
739          for (HRegionInfo hri : check.toArray(new HRegionInfo[check.size()])) {
740            byte[] sk = hri.getStartKey();
741            if (sk.length == 0)
742              sk = splitAlgo.firstRow();
743
744            HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
745                connection.getConfiguration(), fs, tableDir, hri, true);
746
747            // Check every Column Family for that region -- check does not have references.
748            boolean refFound = false;
749            for (ColumnFamilyDescriptor c : htd.getColumnFamilies()) {
750              if ((refFound = regionFs.hasReferences(c.getNameAsString()))) {
751                break;
752              }
753            }
754
755            // compaction is completed when all reference files are gone
756            if (!refFound) {
757              check.remove(hri);
758            }
759          }
760          if (check.isEmpty()) {
761            finished.add(region);
762          } else {
763            physicalSplitting.add(region);
764          }
765        } catch (NoServerForRegionException nsfre) {
766          LOG.debug("No Server Exception thrown for: " + splitAlgo.rowToStr(start));
767          physicalSplitting.add(region);
768          ((ClusterConnection)connection).clearRegionLocationCache();
769        }
770      }
771
772      LOG.debug("Split Scan: " + finished.size() + " finished / "
773          + logicalSplitting.size() + " split wait / "
774          + physicalSplitting.size() + " reference wait");
775
776      return finished;
777    }
778  }
779
780  /**
781   * @param conf
782   * @param tableName
783   * @return A Pair where first item is table dir and second is the split file.
784   * @throws IOException if a remote or network exception occurs
785   */
786  private static Pair<Path, Path> getTableDirAndSplitFile(final Configuration conf,
787    final TableName tableName) throws IOException {
788    Path hbDir = CommonFSUtils.getRootDir(conf);
789    Path tableDir = CommonFSUtils.getTableDir(hbDir, tableName);
790    Path splitFile = new Path(tableDir, "_balancedSplit");
791    return new Pair<>(tableDir, splitFile);
792  }
793
794  static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection,
795      TableName tableName, SplitAlgorithm splitAlgo)
796  throws IOException {
797    Pair<Path, Path> tableDirAndSplitFile =
798      getTableDirAndSplitFile(connection.getConfiguration(), tableName);
799    Path tableDir = tableDirAndSplitFile.getFirst();
800    Path splitFile = tableDirAndSplitFile.getSecond();
801
802    FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
803
804    // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false
805    Set<Pair<String, String>> daughterRegions = Sets.newHashSet();
806
807    // Does a split file exist?
808    if (!fs.exists(splitFile)) {
809      // NO = fresh start. calculate splits to make
810      LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");
811
812      // Query meta for all regions in the table
813      Set<Pair<byte[], byte[]>> rows = Sets.newHashSet();
814      Pair<byte[][], byte[][]> tmp = null;
815      try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
816        tmp = regionLocator.getStartEndKeys();
817      }
818      Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length,
819          "Start and End rows should be equivalent");
820      for (int i = 0; i < tmp.getFirst().length; ++i) {
821        byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];
822        if (start.length == 0)
823          start = splitAlgo.firstRow();
824        if (end.length == 0)
825          end = splitAlgo.lastRow();
826        rows.add(Pair.newPair(start, end));
827      }
828      LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");
829
830      // prepare the split file
831      Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");
832      FSDataOutputStream tmpOut = fs.create(tmpFile);
833
834      // calculate all the splits == [daughterRegions] = [(start, splitPoint)]
835      for (Pair<byte[], byte[]> r : rows) {
836        byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());
837        String startStr = splitAlgo.rowToStr(r.getFirst());
838        String splitStr = splitAlgo.rowToStr(splitPoint);
839        daughterRegions.add(Pair.newPair(startStr, splitStr));
840        LOG.debug("Will Split [" + startStr + " , "
841            + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr);
842        tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr
843            + "\n");
844      }
845      tmpOut.close();
846      fs.rename(tmpFile, splitFile);
847    } else {
848      LOG.debug("_balancedSplit file found. Replay log to restore state...");
849      RecoverLeaseFSUtils.recoverFileLease(fs, splitFile, connection.getConfiguration(), null);
850
851      // parse split file and process remaining splits
852      FSDataInputStream tmpIn = fs.open(splitFile);
853      StringBuilder sb = new StringBuilder(tmpIn.available());
854      while (tmpIn.available() > 0) {
855        sb.append(tmpIn.readChar());
856      }
857      tmpIn.close();
858      for (String line : sb.toString().split("\n")) {
859        String[] cmd = line.split(splitAlgo.separator());
860        Preconditions.checkArgument(3 == cmd.length);
861        byte[] start = splitAlgo.strToRow(cmd[1]);
862        String startStr = splitAlgo.rowToStr(start);
863        byte[] splitPoint = splitAlgo.strToRow(cmd[2]);
864        String splitStr = splitAlgo.rowToStr(splitPoint);
865        Pair<String, String> r = Pair.newPair(startStr, splitStr);
866        if (cmd[0].equals("+")) {
867          LOG.debug("Adding: " + r);
868          daughterRegions.add(r);
869        } else {
870          LOG.debug("Removing: " + r);
871          Preconditions.checkArgument(cmd[0].equals("-"),
872              "Unknown option: " + cmd[0]);
873          Preconditions.checkState(daughterRegions.contains(r),
874              "Missing row: " + r);
875          daughterRegions.remove(r);
876        }
877      }
878      LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");
879    }
880    LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList();
881    for (Pair<String, String> r : daughterRegions) {
882      ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo
883          .strToRow(r.getSecond())));
884    }
885    return ret;
886  }
887
888  /**
889   * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region
890   * boundaries. The format of a HexStringSplit region boundary is the ASCII
891   * representation of an MD5 checksum, or any other uniformly distributed
892   * hexadecimal value. Row are hex-encoded long values in the range
893   * <b>"00000000" =&gt; "FFFFFFFF"</b> and are left-padded with zeros to keep the
894   * same order lexicographically as if they were binary.
895   *
896   * Since this split algorithm uses hex strings as keys, it is easy to read &amp;
897   * write in the shell but takes up more space and may be non-intuitive.
898   */
899  public static class HexStringSplit extends NumberStringSplit {
900    final static String DEFAULT_MIN_HEX = "00000000";
901    final static String DEFAULT_MAX_HEX = "FFFFFFFF";
902    final static int RADIX_HEX = 16;
903
904    public HexStringSplit() {
905      super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
906    }
907
908  }
909
910  /**
911   * The format of a DecimalStringSplit region boundary is the ASCII representation of
912   * reversed sequential number, or any other uniformly distributed decimal value.
913   * Row are decimal-encoded long values in the range
914   * <b>"00000000" =&gt; "99999999"</b> and are left-padded with zeros to keep the
915   * same order lexicographically as if they were binary.
916   */
917  public static class DecimalStringSplit extends NumberStringSplit {
918    final static String DEFAULT_MIN_DEC = "00000000";
919    final static String DEFAULT_MAX_DEC = "99999999";
920    final static int RADIX_DEC = 10;
921
922    public DecimalStringSplit() {
923      super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
924    }
925
926  }
927
928  public abstract static class NumberStringSplit implements SplitAlgorithm {
929
930    String firstRow;
931    BigInteger firstRowInt;
932    String lastRow;
933    BigInteger lastRowInt;
934    int rowComparisonLength;
935    int radix;
936
937    NumberStringSplit(String minRow, String maxRow, int radix) {
938      this.firstRow = minRow;
939      this.lastRow = maxRow;
940      this.radix = radix;
941      this.firstRowInt = BigInteger.ZERO;
942      this.lastRowInt = new BigInteger(lastRow, this.radix);
943      this.rowComparisonLength = lastRow.length();
944    }
945
946    @Override
947    public byte[] split(byte[] start, byte[] end) {
948      BigInteger s = convertToBigInteger(start);
949      BigInteger e = convertToBigInteger(end);
950      Preconditions.checkArgument(!e.equals(BigInteger.ZERO));
951      return convertToByte(split2(s, e));
952    }
953
954    @Override
955    public byte[][] split(int n) {
956      Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,
957          "last row (%s) is configured less than first row (%s)", lastRow,
958          firstRow);
959      // +1 to range because the last row is inclusive
960      BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);
961      Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,
962          "split granularity (%s) is greater than the range (%s)", n, range);
963
964      BigInteger[] splits = new BigInteger[n - 1];
965      BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));
966      for (int i = 1; i < n; i++) {
967        // NOTE: this means the last region gets all the slop.
968        // This is not a big deal if we're assuming n << MAXHEX
969        splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger
970            .valueOf(i)));
971      }
972      return convertToBytes(splits);
973    }
974
975    @Override
976    public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
977      BigInteger s = convertToBigInteger(start);
978      BigInteger e = convertToBigInteger(end);
979
980      Preconditions.checkArgument(e.compareTo(s) > 0,
981                      "last row (%s) is configured less than first row (%s)", rowToStr(end),
982                      end);
983      // +1 to range because the last row is inclusive
984      BigInteger range = e.subtract(s).add(BigInteger.ONE);
985      Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,
986              "split granularity (%s) is greater than the range (%s)", numSplits, range);
987
988      BigInteger[] splits = new BigInteger[numSplits - 1];
989      BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));
990      for (int i = 1; i < numSplits; i++) {
991        // NOTE: this means the last region gets all the slop.
992        // This is not a big deal if we're assuming n << MAXHEX
993        splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger
994                .valueOf(i)));
995      }
996
997      if (inclusive) {
998        BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];
999        inclusiveSplitPoints[0] = convertToBigInteger(start);
1000        inclusiveSplitPoints[numSplits] = convertToBigInteger(end);
1001        System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);
1002        return convertToBytes(inclusiveSplitPoints);
1003      } else {
1004        return convertToBytes(splits);
1005      }
1006    }
1007
1008    @Override
1009    public byte[] firstRow() {
1010      return convertToByte(firstRowInt);
1011    }
1012
1013    @Override
1014    public byte[] lastRow() {
1015      return convertToByte(lastRowInt);
1016    }
1017
1018    @Override
1019    public void setFirstRow(String userInput) {
1020      firstRow = userInput;
1021      firstRowInt = new BigInteger(firstRow, radix);
1022    }
1023
1024    @Override
1025    public void setLastRow(String userInput) {
1026      lastRow = userInput;
1027      lastRowInt = new BigInteger(lastRow, radix);
1028      // Precondition: lastRow > firstRow, so last's length is the greater
1029      rowComparisonLength = lastRow.length();
1030    }
1031
1032    @Override
1033    public byte[] strToRow(String in) {
1034      return convertToByte(new BigInteger(in, radix));
1035    }
1036
1037    @Override
1038    public String rowToStr(byte[] row) {
1039      return Bytes.toStringBinary(row);
1040    }
1041
1042    @Override
1043    public String separator() {
1044      return " ";
1045    }
1046
1047    @Override
1048    public void setFirstRow(byte[] userInput) {
1049      firstRow = Bytes.toString(userInput);
1050    }
1051
1052    @Override
1053    public void setLastRow(byte[] userInput) {
1054      lastRow = Bytes.toString(userInput);
1055    }
1056
1057    /**
1058     * Divide 2 numbers in half (for split algorithm)
1059     *
1060     * @param a number #1
1061     * @param b number #2
1062     * @return the midpoint of the 2 numbers
1063     */
1064    public BigInteger split2(BigInteger a, BigInteger b) {
1065      return a.add(b).divide(BigInteger.valueOf(2)).abs();
1066    }
1067
1068    /**
1069     * Returns an array of bytes corresponding to an array of BigIntegers
1070     *
1071     * @param bigIntegers numbers to convert
1072     * @return bytes corresponding to the bigIntegers
1073     */
1074    public byte[][] convertToBytes(BigInteger[] bigIntegers) {
1075      byte[][] returnBytes = new byte[bigIntegers.length][];
1076      for (int i = 0; i < bigIntegers.length; i++) {
1077        returnBytes[i] = convertToByte(bigIntegers[i]);
1078      }
1079      return returnBytes;
1080    }
1081
1082    /**
1083     * Returns the bytes corresponding to the BigInteger
1084     *
1085     * @param bigInteger number to convert
1086     * @param pad padding length
1087     * @return byte corresponding to input BigInteger
1088     */
1089    public byte[] convertToByte(BigInteger bigInteger, int pad) {
1090      String bigIntegerString = bigInteger.toString(radix);
1091      bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
1092      return Bytes.toBytes(bigIntegerString);
1093    }
1094
1095    /**
1096     * Returns the bytes corresponding to the BigInteger
1097     *
1098     * @param bigInteger number to convert
1099     * @return corresponding bytes
1100     */
1101    public byte[] convertToByte(BigInteger bigInteger) {
1102      return convertToByte(bigInteger, rowComparisonLength);
1103    }
1104
1105    /**
1106     * Returns the BigInteger represented by the byte array
1107     *
1108     * @param row byte array representing row
1109     * @return the corresponding BigInteger
1110     */
1111    public BigInteger convertToBigInteger(byte[] row) {
1112      return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
1113          : BigInteger.ZERO;
1114    }
1115
1116    @Override
1117    public String toString() {
1118      return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
1119          + "," + rowToStr(lastRow()) + "]";
1120    }
1121  }
1122
1123  /**
1124   * A SplitAlgorithm that divides the space of possible keys evenly. Useful
1125   * when the keys are approximately uniform random bytes (e.g. hashes). Rows
1126   * are raw byte values in the range <b>00 =&gt; FF</b> and are right-padded with
1127   * zeros to keep the same memcmp() order. This is the natural algorithm to use
1128   * for a byte[] environment and saves space, but is not necessarily the
1129   * easiest for readability.
1130   */
1131  public static class UniformSplit implements SplitAlgorithm {
1132    static final byte xFF = (byte) 0xFF;
1133    byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
1134    byte[] lastRowBytes =
1135            new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
1136    @Override
1137    public byte[] split(byte[] start, byte[] end) {
1138      return Bytes.split(start, end, 1)[1];
1139    }
1140
1141    @Override
1142    public byte[][] split(int numRegions) {
1143      Preconditions.checkArgument(
1144          Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
1145          "last row (%s) is configured less than first row (%s)",
1146          Bytes.toStringBinary(lastRowBytes),
1147          Bytes.toStringBinary(firstRowBytes));
1148
1149      byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
1150          numRegions - 1);
1151      Preconditions.checkState(splits != null,
1152          "Could not split region with given user input: " + this);
1153
1154      // remove endpoints, which are included in the splits list
1155
1156      return splits == null? null: Arrays.copyOfRange(splits, 1, splits.length - 1);
1157    }
1158
1159    @Override
1160    public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
1161      if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {
1162        start = firstRowBytes;
1163      }
1164      if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {
1165        end = lastRowBytes;
1166      }
1167      Preconditions.checkArgument(
1168              Bytes.compareTo(end, start) > 0,
1169              "last row (%s) is configured less than first row (%s)",
1170              Bytes.toStringBinary(end),
1171              Bytes.toStringBinary(start));
1172
1173      byte[][] splits = Bytes.split(start, end, true,
1174              numSplits - 1);
1175      Preconditions.checkState(splits != null,
1176              "Could not calculate input splits with given user input: " + this);
1177      if (inclusive) {
1178        return splits;
1179      } else {
1180        // remove endpoints, which are included in the splits list
1181        return Arrays.copyOfRange(splits, 1, splits.length - 1);
1182      }
1183    }
1184
1185    @Override
1186    public byte[] firstRow() {
1187      return firstRowBytes;
1188    }
1189
1190    @Override
1191    public byte[] lastRow() {
1192      return lastRowBytes;
1193    }
1194
1195    @Override
1196    public void setFirstRow(String userInput) {
1197      firstRowBytes = Bytes.toBytesBinary(userInput);
1198    }
1199
1200    @Override
1201    public void setLastRow(String userInput) {
1202      lastRowBytes = Bytes.toBytesBinary(userInput);
1203    }
1204
1205
1206    @Override
1207    public void setFirstRow(byte[] userInput) {
1208      firstRowBytes = userInput;
1209    }
1210
1211    @Override
1212    public void setLastRow(byte[] userInput) {
1213      lastRowBytes = userInput;
1214    }
1215
1216    @Override
1217    public byte[] strToRow(String input) {
1218      return Bytes.toBytesBinary(input);
1219    }
1220
1221    @Override
1222    public String rowToStr(byte[] row) {
1223      return Bytes.toStringBinary(row);
1224    }
1225
1226    @Override
1227    public String separator() {
1228      return ",";
1229    }
1230
1231    @Override
1232    public String toString() {
1233      return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
1234          + "," + rowToStr(lastRow()) + "]";
1235    }
1236  }
1237}