001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static org.junit.Assert.assertArrayEquals;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotSame;
024import static org.junit.Assert.assertTrue;
025
026import java.util.ArrayList;
027import java.util.List;
028import org.apache.commons.lang3.ArrayUtils;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtility;
032import org.apache.hadoop.hbase.HRegionInfo;
033import org.apache.hadoop.hbase.HRegionLocation;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.RegionLocator;
036import org.apache.hadoop.hbase.client.Table;
037import org.apache.hadoop.hbase.testclassification.MediumTests;
038import org.apache.hadoop.hbase.testclassification.MiscTests;
039import org.apache.hadoop.hbase.util.RegionSplitter.DecimalStringSplit;
040import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
041import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
042import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
043import org.junit.AfterClass;
044import org.junit.BeforeClass;
045import org.junit.ClassRule;
046import org.junit.Rule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049import org.junit.rules.TestName;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053/**
054 * Tests for {@link RegionSplitter}, which can create a pre-split table or do a rolling split of an
055 * existing table.
056 */
057@Category({ MiscTests.class, MediumTests.class })
058public class TestRegionSplitter {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestRegionSplitter.class);
063
064  private final static Logger LOG = LoggerFactory.getLogger(TestRegionSplitter.class);
065  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
066  private final static String CF_NAME = "SPLIT_TEST_CF";
067  private final static byte xFF = (byte) 0xff;
068
069  @Rule
070  public TestName name = new TestName();
071
072  @BeforeClass
073  public static void setup() throws Exception {
074    UTIL.startMiniCluster();
075  }
076
077  @AfterClass
078  public static void teardown() throws Exception {
079    UTIL.shutdownMiniCluster();
080  }
081
082  /**
083   * Test creating a pre-split table using the HexStringSplit algorithm.
084   */
085  @Test
086  public void testCreatePresplitTableHex() throws Exception {
087    final List<byte[]> expectedBounds = new ArrayList<>(17);
088    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
089    expectedBounds.add("10000000".getBytes());
090    expectedBounds.add("20000000".getBytes());
091    expectedBounds.add("30000000".getBytes());
092    expectedBounds.add("40000000".getBytes());
093    expectedBounds.add("50000000".getBytes());
094    expectedBounds.add("60000000".getBytes());
095    expectedBounds.add("70000000".getBytes());
096    expectedBounds.add("80000000".getBytes());
097    expectedBounds.add("90000000".getBytes());
098    expectedBounds.add("a0000000".getBytes());
099    expectedBounds.add("b0000000".getBytes());
100    expectedBounds.add("c0000000".getBytes());
101    expectedBounds.add("d0000000".getBytes());
102    expectedBounds.add("e0000000".getBytes());
103    expectedBounds.add("f0000000".getBytes());
104    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
105
106    // Do table creation/pre-splitting and verification of region boundaries
107    preSplitTableAndVerify(expectedBounds, HexStringSplit.class.getSimpleName(),
108      TableName.valueOf(name.getMethodName()));
109  }
110
111  /**
112   * Test creating a pre-split table using the UniformSplit algorithm.
113   */
114  @Test
115  public void testCreatePresplitTableUniform() throws Exception {
116    List<byte[]> expectedBounds = new ArrayList<>(17);
117    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
118    expectedBounds.add(new byte[] { 0x10, 0, 0, 0, 0, 0, 0, 0 });
119    expectedBounds.add(new byte[] { 0x20, 0, 0, 0, 0, 0, 0, 0 });
120    expectedBounds.add(new byte[] { 0x30, 0, 0, 0, 0, 0, 0, 0 });
121    expectedBounds.add(new byte[] { 0x40, 0, 0, 0, 0, 0, 0, 0 });
122    expectedBounds.add(new byte[] { 0x50, 0, 0, 0, 0, 0, 0, 0 });
123    expectedBounds.add(new byte[] { 0x60, 0, 0, 0, 0, 0, 0, 0 });
124    expectedBounds.add(new byte[] { 0x70, 0, 0, 0, 0, 0, 0, 0 });
125    expectedBounds.add(new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 });
126    expectedBounds.add(new byte[] { (byte) 0x90, 0, 0, 0, 0, 0, 0, 0 });
127    expectedBounds.add(new byte[] { (byte) 0xa0, 0, 0, 0, 0, 0, 0, 0 });
128    expectedBounds.add(new byte[] { (byte) 0xb0, 0, 0, 0, 0, 0, 0, 0 });
129    expectedBounds.add(new byte[] { (byte) 0xc0, 0, 0, 0, 0, 0, 0, 0 });
130    expectedBounds.add(new byte[] { (byte) 0xd0, 0, 0, 0, 0, 0, 0, 0 });
131    expectedBounds.add(new byte[] { (byte) 0xe0, 0, 0, 0, 0, 0, 0, 0 });
132    expectedBounds.add(new byte[] { (byte) 0xf0, 0, 0, 0, 0, 0, 0, 0 });
133    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
134
135    // Do table creation/pre-splitting and verification of region boundaries
136    preSplitTableAndVerify(expectedBounds, UniformSplit.class.getSimpleName(),
137      TableName.valueOf(name.getMethodName()));
138  }
139
140  /**
141   * Unit tests for the HexStringSplit algorithm. Makes sure it divides up the space of keys in the
142   * way that we expect.
143   */
144  @Test
145  public void unitTestHexStringSplit() {
146    HexStringSplit splitter = new HexStringSplit();
147    // Check splitting while starting from scratch
148
149    byte[][] twoRegionsSplits = splitter.split(2);
150    assertEquals(1, twoRegionsSplits.length);
151    assertArrayEquals("80000000".getBytes(), twoRegionsSplits[0]);
152
153    byte[][] threeRegionsSplits = splitter.split(3);
154    assertEquals(2, threeRegionsSplits.length);
155    byte[] expectedSplit0 = "55555555".getBytes();
156    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
157    byte[] expectedSplit1 = "aaaaaaaa".getBytes();
158    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
159
160    // Check splitting existing regions that have start and end points
161    byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
162    assertArrayEquals("20000000".getBytes(), splitPoint);
163
164    byte[] lastRow = "ffffffff".getBytes();
165    assertArrayEquals(lastRow, splitter.lastRow());
166    byte[] firstRow = "00000000".getBytes();
167    assertArrayEquals(firstRow, splitter.firstRow());
168
169    // Halfway between 00... and 20... should be 10...
170    splitPoint = splitter.split(firstRow, "20000000".getBytes());
171    assertArrayEquals("10000000".getBytes(), splitPoint);
172
173    // Halfway between df... and ff... should be ef....
174    splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
175    assertArrayEquals("efffffff".getBytes(), splitPoint);
176
177    // Check splitting region with multiple mappers per region
178    byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false);
179    assertEquals(2, splits.length);
180    assertArrayEquals("10000000".getBytes(), splits[0]);
181    assertArrayEquals("20000000".getBytes(), splits[1]);
182
183    splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true);
184    assertEquals(3, splits.length);
185    assertArrayEquals("10000000".getBytes(), splits[1]);
186  }
187
188  /**
189   * Unit tests for the DecimalStringSplit algorithm. Makes sure it divides up the space of keys in
190   * the way that we expect.
191   */
192  @Test
193  public void unitTestDecimalStringSplit() {
194    DecimalStringSplit splitter = new DecimalStringSplit();
195    // Check splitting while starting from scratch
196
197    byte[][] twoRegionsSplits = splitter.split(2);
198    assertEquals(1, twoRegionsSplits.length);
199    assertArrayEquals("50000000".getBytes(), twoRegionsSplits[0]);
200
201    byte[][] threeRegionsSplits = splitter.split(3);
202    assertEquals(2, threeRegionsSplits.length);
203    byte[] expectedSplit0 = "33333333".getBytes();
204    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
205    byte[] expectedSplit1 = "66666666".getBytes();
206    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
207
208    // Check splitting existing regions that have start and end points
209    byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
210    assertArrayEquals("20000000".getBytes(), splitPoint);
211
212    byte[] lastRow = "99999999".getBytes();
213    assertArrayEquals(lastRow, splitter.lastRow());
214    byte[] firstRow = "00000000".getBytes();
215    assertArrayEquals(firstRow, splitter.firstRow());
216
217    // Halfway between 00... and 20... should be 10...
218    splitPoint = splitter.split(firstRow, "20000000".getBytes());
219    assertArrayEquals("10000000".getBytes(), splitPoint);
220
221    // Halfway between 00... and 19... should be 09...
222    splitPoint = splitter.split(firstRow, "19999999".getBytes());
223    assertArrayEquals("09999999".getBytes(), splitPoint);
224
225    // Halfway between 79... and 99... should be 89....
226    splitPoint = splitter.split("79999999".getBytes(), lastRow);
227    assertArrayEquals("89999999".getBytes(), splitPoint);
228
229    // Check splitting region with multiple mappers per region
230    byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false);
231    assertEquals(2, splits.length);
232    assertArrayEquals("10000000".getBytes(), splits[0]);
233    assertArrayEquals("20000000".getBytes(), splits[1]);
234
235    splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true);
236    assertEquals(3, splits.length);
237    assertArrayEquals("10000000".getBytes(), splits[1]);
238  }
239
240  /**
241   * Unit tests for the UniformSplit algorithm. Makes sure it divides up the space of keys in the
242   * way that we expect.
243   */
244  @Test
245  public void unitTestUniformSplit() {
246    UniformSplit splitter = new UniformSplit();
247
248    // Check splitting while starting from scratch
249    try {
250      splitter.split(1);
251      throw new AssertionError("Splitting into <2 regions should have thrown exception");
252    } catch (IllegalArgumentException e) {
253    }
254
255    byte[][] twoRegionsSplits = splitter.split(2);
256    assertEquals(1, twoRegionsSplits.length);
257    assertArrayEquals(twoRegionsSplits[0], new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 });
258
259    byte[][] threeRegionsSplits = splitter.split(3);
260    assertEquals(2, threeRegionsSplits.length);
261    byte[] expectedSplit0 = new byte[] { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 };
262    assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
263    byte[] expectedSplit1 = new byte[] { (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA,
264      (byte) 0xAA, (byte) 0xAA, (byte) 0xAA, (byte) 0xAA };
265    assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
266
267    // Check splitting existing regions that have start and end points
268    byte[] splitPoint = splitter.split(new byte[] { 0x10 }, new byte[] { 0x30 });
269    assertArrayEquals(new byte[] { 0x20 }, splitPoint);
270
271    byte[] lastRow = new byte[] { xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF };
272    assertArrayEquals(lastRow, splitter.lastRow());
273    byte[] firstRow = ArrayUtils.EMPTY_BYTE_ARRAY;
274    assertArrayEquals(firstRow, splitter.firstRow());
275
276    splitPoint = splitter.split(firstRow, new byte[] { 0x20 });
277    assertArrayEquals(splitPoint, new byte[] { 0x10 });
278
279    splitPoint =
280      splitter.split(new byte[] { (byte) 0xdf, xFF, xFF, xFF, xFF, xFF, xFF, xFF }, lastRow);
281    assertArrayEquals(splitPoint, new byte[] { (byte) 0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF });
282
283    splitPoint = splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'b' });
284    assertArrayEquals(splitPoint, new byte[] { 'a', 'a', 'a', (byte) 0x80 });
285
286    // Check splitting region with multiple mappers per region
287    byte[][] splits =
288      splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'd' }, 3, false);
289    assertEquals(2, splits.length);
290    assertArrayEquals(splits[0], new byte[] { 'a', 'a', 'b' });
291    assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c' });
292
293    splits = splitter.split(new byte[] { 'a', 'a', 'a' }, new byte[] { 'a', 'a', 'e' }, 2, true);
294    assertEquals(3, splits.length);
295    assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c' });
296  }
297
298  @Test
299  public void testUserInput() {
300    SplitAlgorithm algo = new HexStringSplit();
301    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
302    assertFalse(splitFailsPrecondition(algo, "00", "AA")); // custom is fine
303    assertTrue(splitFailsPrecondition(algo, "AA", "00")); // range error
304    assertTrue(splitFailsPrecondition(algo, "AA", "AA")); // range error
305    assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
306    assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
307    assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
308
309    algo = new DecimalStringSplit();
310    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
311    assertFalse(splitFailsPrecondition(algo, "00", "99")); // custom is fine
312    assertTrue(splitFailsPrecondition(algo, "99", "00")); // range error
313    assertTrue(splitFailsPrecondition(algo, "99", "99")); // range error
314    assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
315    assertFalse(splitFailsPrecondition(algo, "0", "9", 10)); // should be fine
316    assertTrue(splitFailsPrecondition(algo, "0", "9", 11)); // too granular
317
318    algo = new UniformSplit();
319    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
320    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine
321    assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\x00")); // range error
322    assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\xAA")); // range error
323    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x02", 3)); // should be fine
324    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 11)); // should be fine
325    assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 12)); // should be fine
326  }
327
328  private boolean splitFailsPrecondition(SplitAlgorithm algo) {
329    return splitFailsPrecondition(algo, 100);
330  }
331
332  private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, String lastRow) {
333    return splitFailsPrecondition(algo, firstRow, lastRow, 100);
334  }
335
336  private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, String lastRow,
337    int numRegions) {
338    algo.setFirstRow(firstRow);
339    algo.setLastRow(lastRow);
340    return splitFailsPrecondition(algo, numRegions);
341  }
342
343  private boolean splitFailsPrecondition(SplitAlgorithm algo, int numRegions) {
344    try {
345      byte[][] s = algo.split(numRegions);
346      LOG.debug("split algo = " + algo);
347      if (s != null) {
348        StringBuilder sb = new StringBuilder();
349        for (byte[] b : s) {
350          sb.append(Bytes.toStringBinary(b) + "  ");
351        }
352        LOG.debug(sb.toString());
353      }
354      return false;
355    } catch (IllegalArgumentException e) {
356      return true;
357    } catch (IllegalStateException e) {
358      return true;
359    } catch (IndexOutOfBoundsException e) {
360      return true;
361    }
362  }
363
364  /**
365   * Creates a pre-split table with expectedBounds.size()+1 regions, then verifies that the region
366   * boundaries are the same as the expected region boundaries in expectedBounds.
367   * @throws Various junit assertions
368   */
369  private void preSplitTableAndVerify(List<byte[]> expectedBounds, String splitClass,
370    TableName tableName) throws Exception {
371    final int numRegions = expectedBounds.size() - 1;
372    final Configuration conf = UTIL.getConfiguration();
373    conf.setInt("split.count", numRegions);
374    SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
375    RegionSplitter.createPresplitTable(tableName, splitAlgo, new String[] { CF_NAME }, conf);
376    verifyBounds(expectedBounds, tableName);
377  }
378
379  @Test
380  public void noopRollingSplit() throws Exception {
381    final List<byte[]> expectedBounds = new ArrayList<>(1);
382    expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
383    rollingSplitAndVerify(TableName.valueOf(TestRegionSplitter.class.getSimpleName()),
384      "UniformSplit", expectedBounds);
385  }
386
387  private void rollingSplitAndVerify(TableName tableName, String splitClass,
388    List<byte[]> expectedBounds) throws Exception {
389    final Configuration conf = UTIL.getConfiguration();
390
391    // Set this larger than the number of splits so RegionSplitter won't block
392    conf.setInt("split.outstanding", 5);
393    SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
394    RegionSplitter.rollingSplit(tableName, splitAlgo, conf);
395    verifyBounds(expectedBounds, tableName);
396  }
397
398  private void verifyBounds(List<byte[]> expectedBounds, TableName tableName) throws Exception {
399    // Get region boundaries from the cluster and verify their endpoints
400    final int numRegions = expectedBounds.size() - 1;
401    try (Table table = UTIL.getConnection().getTable(tableName);
402      RegionLocator locator = UTIL.getConnection().getRegionLocator(tableName)) {
403      final List<HRegionLocation> regionInfoMap = locator.getAllRegionLocations();
404      assertEquals(numRegions, regionInfoMap.size());
405      for (HRegionLocation entry : regionInfoMap) {
406        final HRegionInfo regionInfo = entry.getRegionInfo();
407        byte[] regionStart = regionInfo.getStartKey();
408        byte[] regionEnd = regionInfo.getEndKey();
409
410        // This region's start key should be one of the region boundaries
411        int startBoundaryIndex = indexOfBytes(expectedBounds, regionStart);
412        assertNotSame(-1, startBoundaryIndex);
413
414        // This region's end key should be the region boundary that comes
415        // after the starting boundary.
416        byte[] expectedRegionEnd = expectedBounds.get(startBoundaryIndex + 1);
417        assertEquals(0, Bytes.compareTo(regionEnd, expectedRegionEnd));
418      }
419    }
420  }
421
422  /**
423   * List.indexOf() doesn't really work for a List&lt;byte[]>, because byte[] doesn't override
424   * equals(). This method checks whether a list contains a given element by checking each element
425   * using the byte array comparator.
426   * @return the index of the first element that equals compareTo, or -1 if no elements are equal.
427   */
428  static private int indexOfBytes(List<byte[]> list, byte[] compareTo) {
429    int listIndex = 0;
430    for (byte[] elem : list) {
431      if (Bytes.BYTES_COMPARATOR.compare(elem, compareTo) == 0) {
432        return listIndex;
433      }
434      listIndex++;
435    }
436    return -1;
437  }
438
439}