001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static org.junit.jupiter.api.Assertions.assertArrayEquals;
021import static org.junit.jupiter.api.Assertions.assertEquals;
022import static org.junit.jupiter.api.Assertions.assertFalse;
023import static org.junit.jupiter.api.Assertions.assertNotSame;
024import static org.junit.jupiter.api.Assertions.assertTrue;
025
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.List;
029import org.apache.commons.lang3.ArrayUtils;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.HRegionLocation;
033import org.apache.hadoop.hbase.TableName;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.RegionLocator;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.testclassification.MediumTests;
038import org.apache.hadoop.hbase.testclassification.MiscTests;
039import org.apache.hadoop.hbase.util.RegionSplitter.DecimalStringSplit;
040import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
041import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
042import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
043import org.junit.jupiter.api.AfterAll;
044import org.junit.jupiter.api.BeforeAll;
045import org.junit.jupiter.api.Tag;
046import org.junit.jupiter.api.Test;
047import org.junit.jupiter.api.TestInfo;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051/**
052 * Tests for {@link RegionSplitter}, which can create a pre-split table or do a rolling split of an
053 * existing table.
054 */
055@Tag(MiscTests.TAG)
056@Tag(MediumTests.TAG)
057public class TestRegionSplitter {
058
059  private final static Logger LOG = LoggerFactory.getLogger(TestRegionSplitter.class);
060  private final static HBaseTestingUtil UTIL = new HBaseTestingUtil();
061  private final static String CF_NAME = "SPLIT_TEST_CF";
062  private final static byte xFF = (byte) 0xff;
063
064  @BeforeAll
065  public static void setup() throws Exception {
066    UTIL.startMiniCluster(2);
067  }
068
069  @AfterAll
070  public static void teardown() throws Exception {
071    UTIL.shutdownMiniCluster();
072  }
073
074  /**
075   * Test creating a pre-split table using the HexStringSplit algorithm.
076   */
077  @Test
078  public void testCreatePresplitTableHex(TestInfo testInfo) throws Exception {
079    final List<byte[]> expectedBounds = new ArrayList<>(17);
080    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
081    expectedBounds.add(Bytes.toBytes("10000000"));
082    expectedBounds.add(Bytes.toBytes("20000000"));
083    expectedBounds.add(Bytes.toBytes("30000000"));
084    expectedBounds.add(Bytes.toBytes("40000000"));
085    expectedBounds.add(Bytes.toBytes("50000000"));
086    expectedBounds.add(Bytes.toBytes("60000000"));
087    expectedBounds.add(Bytes.toBytes("70000000"));
088    expectedBounds.add(Bytes.toBytes("80000000"));
089    expectedBounds.add(Bytes.toBytes("90000000"));
090    expectedBounds.add(Bytes.toBytes("a0000000"));
091    expectedBounds.add(Bytes.toBytes("b0000000"));
092    expectedBounds.add(Bytes.toBytes("c0000000"));
093    expectedBounds.add(Bytes.toBytes("d0000000"));
094    expectedBounds.add(Bytes.toBytes("e0000000"));
095    expectedBounds.add(Bytes.toBytes("f0000000"));
096    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
097
098    // Do table creation/pre-splitting and verification of region boundaries
099    preSplitTableAndVerify(expectedBounds, HexStringSplit.class.getSimpleName(),
100      TableName.valueOf(testInfo.getTestMethod().get().getName()));
101  }
102
103  /**
104   * Test creating a pre-split table and splitting it again using the HexStringSplit and
105   * DecimalStringSplit algorithms.
106   */
107  @Test
108  public void testSplitPresplitTable() throws Exception {
109    testSplitPresplitTable(new HexStringSplit());
110    testSplitPresplitTable(new DecimalStringSplit());
111  }
112
113  private void testSplitPresplitTable(RegionSplitter.NumberStringSplit splitter) throws Exception {
114    final List<byte[]> initialBounds = new ArrayList<>();
115    initialBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
116    initialBounds.addAll(Arrays.asList(splitter.split(8)));
117    initialBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
118
119    // Do table creation/pre-splitting and verification of region boundaries
120    final String className = splitter.getClass().getSimpleName();
121    final TableName tableName = TableName.valueOf(className);
122    preSplitTableAndVerify(initialBounds, className, tableName);
123
124    // Split the table again and verify the new region boundaries
125    final List<byte[]> expectedBounds = new ArrayList<>();
126    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
127    expectedBounds.addAll(Arrays.asList(splitter.split(16)));
128    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
129    rollingSplitAndVerify(tableName, className, expectedBounds);
130  }
131
132  /**
133   * Test creating a pre-split table using the UniformSplit algorithm.
134   */
135  @Test
136  public void testCreatePresplitTableUniform(TestInfo testInfo) throws Exception {
137    List<byte[]> expectedBounds = new ArrayList<>(17);
138    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
139    expectedBounds.add(new byte[] { 0x10, 0, 0, 0, 0, 0, 0, 0 });
140    expectedBounds.add(new byte[] { 0x20, 0, 0, 0, 0, 0, 0, 0 });
141    expectedBounds.add(new byte[] { 0x30, 0, 0, 0, 0, 0, 0, 0 });
142    expectedBounds.add(new byte[] { 0x40, 0, 0, 0, 0, 0, 0, 0 });
143    expectedBounds.add(new byte[] { 0x50, 0, 0, 0, 0, 0, 0, 0 });
144    expectedBounds.add(new byte[] { 0x60, 0, 0, 0, 0, 0, 0, 0 });
145    expectedBounds.add(new byte[] { 0x70, 0, 0, 0, 0, 0, 0, 0 });
146    expectedBounds.add(new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 });
147    expectedBounds.add(new byte[] { (byte) 0x90, 0, 0, 0, 0, 0, 0, 0 });
148    expectedBounds.add(new byte[] { (byte) 0xa0, 0, 0, 0, 0, 0, 0, 0 });
149    expectedBounds.add(new byte[] { (byte) 0xb0, 0, 0, 0, 0, 0, 0, 0 });
150    expectedBounds.add(new byte[] { (byte) 0xc0, 0, 0, 0, 0, 0, 0, 0 });
151    expectedBounds.add(new byte[] { (byte) 0xd0, 0, 0, 0, 0, 0, 0, 0 });
152    expectedBounds.add(new byte[] { (byte) 0xe0, 0, 0, 0, 0, 0, 0, 0 });
153    expectedBounds.add(new byte[] { (byte) 0xf0, 0, 0, 0, 0, 0, 0, 0 });
154    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
155
156    // Do table creation/pre-splitting and verification of region boundaries
157    preSplitTableAndVerify(expectedBounds, UniformSplit.class.getSimpleName(),
158      TableName.valueOf(testInfo.getTestMethod().get().getName()));
159  }
160
161  /**
162   * Unit tests for the HexStringSplit algorithm. Makes sure it divides up the space of keys in the
163   * way that we expect.
164   */
165  @Test
166  public void unitTestHexStringSplit() {
167    HexStringSplit splitter = new HexStringSplit();
168    // Check splitting while starting from scratch
169
170    byte[][] twoRegionsSplits = splitter.split(2);
171    assertEquals(1, twoRegionsSplits.length);
172    assertArrayEquals(Bytes.toBytes("80000000"), twoRegionsSplits[0]);
173
174    byte[][] threeRegionsSplits = splitter.split(3);
175    assertEquals(2, threeRegionsSplits.length);
176    byte[] expectedSplit0 = Bytes.toBytes("55555555");
177    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
178    byte[] expectedSplit1 = Bytes.toBytes("aaaaaaaa");
179    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
180
181    // Check splitting existing regions that have start and end points
182    byte[] splitPoint = splitter.split(Bytes.toBytes("10000000"), Bytes.toBytes("30000000"));
183    assertArrayEquals(Bytes.toBytes("20000000"), splitPoint);
184
185    byte[] lastRow = Bytes.toBytes("ffffffff");
186    assertArrayEquals(lastRow, splitter.lastRow());
187    byte[] firstRow = Bytes.toBytes("00000000");
188    assertArrayEquals(firstRow, splitter.firstRow());
189
190    // Halfway between 00... and 20... should be 10...
191    splitPoint = splitter.split(firstRow, Bytes.toBytes("20000000"));
192    assertArrayEquals(Bytes.toBytes("10000000"), splitPoint);
193
194    // Halfway between df... and ff... should be ef....
195    splitPoint = splitter.split(Bytes.toBytes("dfffffff"), lastRow);
196    assertArrayEquals(Bytes.toBytes("efffffff"), splitPoint);
197
198    // Check splitting region with multiple mappers per region
199    byte[][] splits =
200      splitter.split(Bytes.toBytes("00000000"), Bytes.toBytes("30000000"), 3, false);
201    assertEquals(2, splits.length);
202    assertArrayEquals(Bytes.toBytes("10000000"), splits[0]);
203    assertArrayEquals(Bytes.toBytes("20000000"), splits[1]);
204
205    splits = splitter.split(Bytes.toBytes("00000000"), Bytes.toBytes("20000000"), 2, true);
206    assertEquals(3, splits.length);
207    assertArrayEquals(Bytes.toBytes("10000000"), splits[1]);
208  }
209
210  /**
211   * Unit tests for the DecimalStringSplit algorithm. Makes sure it divides up the space of keys in
212   * the way that we expect.
213   */
214  @Test
215  public void unitTestDecimalStringSplit() {
216    DecimalStringSplit splitter = new DecimalStringSplit();
217    // Check splitting while starting from scratch
218
219    byte[][] twoRegionsSplits = splitter.split(2);
220    assertEquals(1, twoRegionsSplits.length);
221    assertArrayEquals(Bytes.toBytes("50000000"), twoRegionsSplits[0]);
222
223    byte[][] threeRegionsSplits = splitter.split(3);
224    assertEquals(2, threeRegionsSplits.length);
225    byte[] expectedSplit0 = Bytes.toBytes("33333333");
226    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
227    byte[] expectedSplit1 = Bytes.toBytes("66666666");
228    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
229
230    // Check splitting existing regions that have start and end points
231    byte[] splitPoint = splitter.split(Bytes.toBytes("10000000"), Bytes.toBytes("30000000"));
232    assertArrayEquals(Bytes.toBytes("20000000"), splitPoint);
233
234    byte[] lastRow = Bytes.toBytes("99999999");
235    assertArrayEquals(lastRow, splitter.lastRow());
236    byte[] firstRow = Bytes.toBytes("00000000");
237    assertArrayEquals(firstRow, splitter.firstRow());
238
239    // Halfway between 00... and 20... should be 10...
240    splitPoint = splitter.split(firstRow, Bytes.toBytes("20000000"));
241    assertArrayEquals(Bytes.toBytes("10000000"), splitPoint);
242
243    // Halfway between 00... and 19... should be 09...
244    splitPoint = splitter.split(firstRow, Bytes.toBytes("19999999"));
245    assertArrayEquals(Bytes.toBytes("09999999"), splitPoint);
246
247    // Halfway between 79... and 99... should be 89....
248    splitPoint = splitter.split(Bytes.toBytes("79999999"), lastRow);
249    assertArrayEquals(Bytes.toBytes("89999999"), splitPoint);
250
251    // Check splitting region with multiple mappers per region
252    byte[][] splits =
253      splitter.split(Bytes.toBytes("00000000"), Bytes.toBytes("30000000"), 3, false);
254    assertEquals(2, splits.length);
255    assertArrayEquals(Bytes.toBytes("10000000"), splits[0]);
256    assertArrayEquals(Bytes.toBytes("20000000"), splits[1]);
257
258    splits = splitter.split(Bytes.toBytes("00000000"), Bytes.toBytes("20000000"), 2, true);
259    assertEquals(3, splits.length);
260    assertArrayEquals(Bytes.toBytes("10000000"), splits[1]);
261  }
262
263  /**
264   * Unit tests for the UniformSplit algorithm. Makes sure it divides up the space of keys in the
265   * way that we expect.
266   */
267  @Test
268  public void unitTestUniformSplit() {
269    UniformSplit splitter = new UniformSplit();
270
271    // Check splitting while starting from scratch
272    try {
273      splitter.split(1);
274      throw new AssertionError("Splitting into <2 regions should have thrown exception");
275    } catch (IllegalArgumentException e) {
276    }
277
278    byte[][] twoRegionsSplits = splitter.split(2);
279    assertEquals(1, twoRegionsSplits.length);
280    assertArrayEquals(twoRegionsSplits[0], new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 });
281
282    byte[][] threeRegionsSplits = splitter.split(3);
283    assertEquals(2, threeRegionsSplits.length);
284    byte[] expectedSplit0 = new byte[] { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 };
285    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
286    byte[] expectedSplit1 = new byte[] { (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA,
287      (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA };
288    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
289
290    // Check splitting existing regions that have start and end points
291    byte[] splitPoint = splitter.split(new byte[] { 0x10 }, new byte[] { 0x30 });
292    assertArrayEquals(new byte[] { 0x20 }, splitPoint);
293
294    byte[] lastRow = new byte[] { xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF };
295    assertArrayEquals(lastRow, splitter.lastRow());
296    byte[] firstRow = ArrayUtils.EMPTY_BYTE_ARRAY;
297    assertArrayEquals(firstRow, splitter.firstRow());
298
299    splitPoint = splitter.split(firstRow, new byte[] { 0x20 });
300    assertArrayEquals(splitPoint, new byte[] { 0x10 });
301
302    splitPoint =
303      splitter.split(new byte[] { (byte) 0xdf, xFF, xFF, xFF, xFF, xFF, xFF, xFF }, lastRow);
304    assertArrayEquals(splitPoint, new byte[] { (byte) 0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF });
305
306    splitPoint = splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'b' });
307    assertArrayEquals(splitPoint, new byte[] { 'a', 'a', 'a', (byte) 0x80 });
308
309    // Check splitting region with multiple mappers per region
310    byte[][] splits =
311      splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'd' }, 3, false);
312    assertEquals(2, splits.length);
313    assertArrayEquals(splits[0], new byte[] { 'a', 'a', 'b' });
314    assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c' });
315
316    splits = splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'e' }, 2, true);
317    assertEquals(3, splits.length);
318    assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c' });
319  }
320
321  @Test
322  public void testUserInput() {
323    SplitAlgorithm algo = new HexStringSplit();
324    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
325    assertFalse(splitFailsPrecondition(algo, "00", "AA")); // custom is fine
326    assertTrue(splitFailsPrecondition(algo, "AA", "00")); // range error
327    assertTrue(splitFailsPrecondition(algo, "AA", "AA")); // range error
328    assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
329    assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
330    assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
331
332    algo = new DecimalStringSplit();
333    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
334    assertFalse(splitFailsPrecondition(algo, "00", "99")); // custom is fine
335    assertTrue(splitFailsPrecondition(algo, "99", "00")); // range error
336    assertTrue(splitFailsPrecondition(algo, "99", "99")); // range error
337    assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
338    assertFalse(splitFailsPrecondition(algo, "0", "9", 10)); // should be fine
339    assertTrue(splitFailsPrecondition(algo, "0", "9", 11)); // too granular
340
341    algo = new UniformSplit();
342    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
343    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine
344    assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\x00")); // range error
345    assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\xAA")); // range error
346    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x02", 3)); // should be fine
347    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 11)); // should be fine
348    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 12)); // should be fine
349  }
350
351  private boolean splitFailsPrecondition(SplitAlgorithm algo) {
352    return splitFailsPrecondition(algo, 100);
353  }
354
355  private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, String lastRow) {
356    return splitFailsPrecondition(algo, firstRow, lastRow, 100);
357  }
358
359  private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, String lastRow,
360    int numRegions) {
361    algo.setFirstRow(firstRow);
362    algo.setLastRow(lastRow);
363    return splitFailsPrecondition(algo, numRegions);
364  }
365
366  private boolean splitFailsPrecondition(SplitAlgorithm algo, int numRegions) {
367    try {
368      byte[][] s = algo.split(numRegions);
369      LOG.debug("split algo = " + algo);
370      if (s != null) {
371        StringBuilder sb = new StringBuilder();
372        for (byte[] b : s) {
373          sb.append(Bytes.toStringBinary(b) + "  ");
374        }
375        LOG.debug(sb.toString());
376      }
377      return false;
378    } catch (IllegalArgumentException e) {
379      return true;
380    } catch (IllegalStateException e) {
381      return true;
382    } catch (IndexOutOfBoundsException e) {
383      return true;
384    }
385  }
386
387  /**
388   * Creates a pre-split table with expectedBounds.size()+1 regions, then verifies that the region
389   * boundaries are the same as the expected region boundaries in expectedBounds.
390   * @throws Various junit assertions
391   */
392  private void preSplitTableAndVerify(List<byte[]> expectedBounds, String splitClass,
393    TableName tableName) throws Exception {
394    final int numRegions = expectedBounds.size() - 1;
395    final Configuration conf = UTIL.getConfiguration();
396    conf.setInt("split.count", numRegions);
397    SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
398    RegionSplitter.createPresplitTable(tableName, splitAlgo, new String[] { CF_NAME }, conf);
399    verifyBounds(expectedBounds, tableName);
400  }
401
402  @Test
403  public void noopRollingSplit() throws Exception {
404    final List<byte[]> expectedBounds = new ArrayList<>(1);
405    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
406    rollingSplitAndVerify(TableName.valueOf(TestRegionSplitter.class.getSimpleName()),
407      "UniformSplit", expectedBounds);
408  }
409
410  private void rollingSplitAndVerify(TableName tableName, String splitClass,
411    List<byte[]> expectedBounds) throws Exception {
412    final Configuration conf = UTIL.getConfiguration();
413
414    // Set this larger than the number of splits so RegionSplitter won't block
415    conf.setInt("split.outstanding", 5);
416    SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
417    RegionSplitter.rollingSplit(tableName, splitAlgo, conf);
418    verifyBounds(expectedBounds, tableName);
419  }
420
421  private void verifyBounds(List<byte[]> expectedBounds, TableName tableName) throws Exception {
422    // Get region boundaries from the cluster and verify their endpoints
423    final int numRegions = expectedBounds.size() - 1;
424    try (Table table = UTIL.getConnection().getTable(tableName);
425      RegionLocator locator = UTIL.getConnection().getRegionLocator(tableName)) {
426      final List<HRegionLocation> regionInfoMap = locator.getAllRegionLocations();
427      assertEquals(numRegions, regionInfoMap.size());
428      for (HRegionLocation entry : regionInfoMap) {
429        final RegionInfo regionInfo = entry.getRegion();
430        byte[] regionStart = regionInfo.getStartKey();
431        byte[] regionEnd = regionInfo.getEndKey();
432
433        // This region's start key should be one of the region boundaries
434        int startBoundaryIndex = indexOfBytes(expectedBounds, regionStart);
435        assertNotSame(-1, startBoundaryIndex);
436
437        // This region's end key should be the region boundary that comes
438        // after the starting boundary.
439        byte[] expectedRegionEnd = expectedBounds.get(startBoundaryIndex + 1);
440        assertEquals(0, Bytes.compareTo(regionEnd, expectedRegionEnd));
441      }
442    }
443  }
444
445  /**
446   * List.indexOf() doesn't really work for a List&lt;byte[]>, because byte[] doesn't override
447   * equals(). This method checks whether a list contains a given element by checking each element
448   * using the byte array comparator.
449   * @return the index of the first element that equals compareTo, or -1 if no elements are equal.
450   */
451  static private int indexOfBytes(List<byte[]> list, byte[] compareTo) {
452    int listIndex = 0;
453    for (byte[] elem : list) {
454      if (Bytes.BYTES_COMPARATOR.compare(elem, compareTo) == 0) {
455        return listIndex;
456      }
457      listIndex++;
458    }
459    return -1;
460  }
461
462}