001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.tool;
019
020import static org.junit.Assert.assertArrayEquals;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023import static org.junit.Assert.fail;
024
025import java.io.IOException;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.List;
029import java.util.Locale;
030import java.util.Map;
031import java.util.TreeMap;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.FSDataOutputStream;
034import org.apache.hadoop.fs.FileStatus;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.hbase.HBaseClassTestRule;
038import org.apache.hadoop.hbase.HBaseTestingUtility;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.NamespaceDescriptor;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.TableNotFoundException;
043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
045import org.apache.hadoop.hbase.client.Table;
046import org.apache.hadoop.hbase.client.TableDescriptor;
047import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
048import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
049import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
050import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
051import org.apache.hadoop.hbase.io.hfile.CacheConfig;
052import org.apache.hadoop.hbase.io.hfile.HFile;
053import org.apache.hadoop.hbase.io.hfile.HFileScanner;
054import org.apache.hadoop.hbase.regionserver.BloomType;
055import org.apache.hadoop.hbase.testclassification.LargeTests;
056import org.apache.hadoop.hbase.testclassification.MiscTests;
057import org.apache.hadoop.hbase.util.Bytes;
058import org.apache.hadoop.hbase.util.FSUtils;
059import org.apache.hadoop.hbase.util.HFileTestUtil;
060import org.junit.AfterClass;
061import org.junit.BeforeClass;
062import org.junit.ClassRule;
063import org.junit.Rule;
064import org.junit.Test;
065import org.junit.experimental.categories.Category;
066import org.junit.rules.TestName;
067
068import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
069
070/**
071 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run
072 * faster than the full MR cluster tests in TestHFileOutputFormat
073 */
074@Category({ MiscTests.class, LargeTests.class })
075public class TestLoadIncrementalHFiles {
076
077  @ClassRule
078  public static final HBaseClassTestRule CLASS_RULE =
079      HBaseClassTestRule.forClass(TestLoadIncrementalHFiles.class);
080
081  @Rule
082  public TestName tn = new TestName();
083
084  private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
085  private static final byte[] FAMILY = Bytes.toBytes("myfam");
086  private static final String NAMESPACE = "bulkNS";
087
088  static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
089  static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
090
091  private static final byte[][] SPLIT_KEYS =
092      new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ppp") };
093
094  static HBaseTestingUtility util = new HBaseTestingUtility();
095
096  @BeforeClass
097  public static void setUpBeforeClass() throws Exception {
098    util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
099    util.getConfiguration().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
100      MAX_FILES_PER_REGION_PER_FAMILY);
101    // change default behavior so that tag values are returned with normal rpcs
102    util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
103      KeyValueCodecWithTags.class.getCanonicalName());
104    util.startMiniCluster();
105
106    setupNamespace();
107  }
108
109  protected static void setupNamespace() throws Exception {
110    util.getAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
111  }
112
113  @AfterClass
114  public static void tearDownAfterClass() throws Exception {
115    util.shutdownMiniCluster();
116  }
117
118  @Test
119  public void testSimpleLoadWithMap() throws Exception {
120    runTest("testSimpleLoadWithMap", BloomType.NONE,
121      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
122          new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
123      true);
124  }
125
126  /**
127   * Test case that creates some regions and loads HFiles that fit snugly inside those regions
128   */
129  @Test
130  public void testSimpleLoad() throws Exception {
131    runTest("testSimpleLoad", BloomType.NONE,
132      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
133          new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, });
134  }
135
136  @Test
137  public void testSimpleLoadWithFileCopy() throws Exception {
138    String testName = tn.getMethodName();
139    final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
140    runTest(testName, buildHTD(TableName.valueOf(TABLE_NAME), BloomType.NONE),
141        false, null, new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
142          new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
143      false, true, 2);
144  }
145
146  /**
147   * Test case that creates some regions and loads HFiles that cross the boundaries of those regions
148   */
149  @Test
150  public void testRegionCrossingLoad() throws Exception {
151    runTest("testRegionCrossingLoad", BloomType.NONE,
152      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
153          new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
154  }
155
156  /**
157   * Test loading into a column family that has a ROW bloom filter.
158   */
159  @Test
160  public void testRegionCrossingRowBloom() throws Exception {
161    runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
162      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
163          new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
164  }
165
166  /**
167   * Test loading into a column family that has a ROWCOL bloom filter.
168   */
169  @Test
170  public void testRegionCrossingRowColBloom() throws Exception {
171    runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
172      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
173          new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
174  }
175
176  /**
177   * Test case that creates some regions and loads HFiles that have different region boundaries than
178   * the table pre-split.
179   */
180  @Test
181  public void testSimpleHFileSplit() throws Exception {
182    runTest("testHFileSplit", BloomType.NONE,
183      new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
184          Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
185      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
186          new byte[][] { Bytes.toBytes("mmm"), Bytes.toBytes("zzz") }, });
187  }
188
189  /**
190   * Test case that creates some regions and loads HFiles that cross the boundaries and have
191   * different region boundaries than the table pre-split.
192   */
193  @Test
194  public void testRegionCrossingHFileSplit() throws Exception {
195    testRegionCrossingHFileSplit(BloomType.NONE);
196  }
197
198  /**
199   * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom
200   * filter and a different region boundaries than the table pre-split.
201   */
202  @Test
203  public void testRegionCrossingHFileSplitRowBloom() throws Exception {
204    testRegionCrossingHFileSplit(BloomType.ROW);
205  }
206
207  /**
208   * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL
209   * bloom filter and a different region boundaries than the table pre-split.
210   */
211  @Test
212  public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
213    testRegionCrossingHFileSplit(BloomType.ROWCOL);
214  }
215
216  @Test
217  public void testSplitALot() throws Exception {
218    runTest("testSplitALot", BloomType.NONE,
219      new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"), Bytes.toBytes("ccc"),
220          Bytes.toBytes("ddd"), Bytes.toBytes("eee"), Bytes.toBytes("fff"), Bytes.toBytes("ggg"),
221          Bytes.toBytes("hhh"), Bytes.toBytes("iii"), Bytes.toBytes("lll"), Bytes.toBytes("mmm"),
222          Bytes.toBytes("nnn"), Bytes.toBytes("ooo"), Bytes.toBytes("ppp"), Bytes.toBytes("qqq"),
223          Bytes.toBytes("rrr"), Bytes.toBytes("sss"), Bytes.toBytes("ttt"), Bytes.toBytes("uuu"),
224          Bytes.toBytes("vvv"), Bytes.toBytes("zzz"), },
225      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") }, });
226  }
227
228  private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
229    runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
230      new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
231          Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
232      new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
233          new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
234  }
235
236  private TableDescriptor buildHTD(TableName tableName, BloomType bloomType) {
237    return TableDescriptorBuilder.newBuilder(tableName)
238        .setColumnFamily(
239          ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setBloomFilterType(bloomType).build())
240        .build();
241  }
242
243  private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges)
244      throws Exception {
245    runTest(testName, bloomType, null, hfileRanges);
246  }
247
248  private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges, boolean useMap)
249      throws Exception {
250    runTest(testName, bloomType, null, hfileRanges, useMap);
251  }
252
253  private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
254      byte[][][] hfileRanges) throws Exception {
255    runTest(testName, bloomType, tableSplitKeys, hfileRanges, false);
256  }
257
258  private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
259      byte[][][] hfileRanges, boolean useMap) throws Exception {
260    final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
261    final boolean preCreateTable = tableSplitKeys != null;
262
263    // Run the test bulkloading the table to the default namespace
264    final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
265    runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
266      useMap, 2);
267
268
269    /* Run the test bulkloading the table from a depth of 3
270      directory structure is now
271      baseDirectory
272          -- regionDir
273            -- familyDir
274              -- storeFileDir
275    */
276    if (preCreateTable) {
277      runTest(testName + 2, TABLE_WITHOUT_NS, bloomType, true, tableSplitKeys, hfileRanges,
278          false, 3);
279    }
280
281    // Run the test bulkloading the table to the specified namespace
282    final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
283    runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
284      useMap, 2);
285  }
286
287  private void runTest(String testName, TableName tableName, BloomType bloomType,
288      boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges,
289      boolean useMap, int depth) throws Exception {
290    TableDescriptor htd = buildHTD(tableName, bloomType);
291    runTest(testName, htd, preCreateTable, tableSplitKeys, hfileRanges, useMap, false, depth);
292  }
293
294  public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util,
295      byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
296      byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
297      int initRowCount, int factor) throws Exception {
298    return loadHFiles(testName, htd, util, fam, qual, preCreateTable, tableSplitKeys, hfileRanges,
299        useMap, deleteFile, copyFiles, initRowCount, factor, 2);
300  }
301
302  public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util,
303      byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
304      byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
305      int initRowCount, int factor, int depth) throws Exception {
306    Path baseDirectory = util.getDataTestDirOnTestFS(testName);
307    FileSystem fs = util.getTestFileSystem();
308    baseDirectory = baseDirectory.makeQualified(fs.getUri(), fs.getWorkingDirectory());
309    Path parentDir = baseDirectory;
310    if (depth == 3) {
311      assert !useMap;
312      parentDir = new Path(baseDirectory, "someRegion");
313    }
314    Path familyDir = new Path(parentDir, Bytes.toString(fam));
315
316    int hfileIdx = 0;
317    Map<byte[], List<Path>> map = null;
318    List<Path> list = null;
319    if (useMap || copyFiles) {
320      list = new ArrayList<>();
321    }
322    if (useMap) {
323      map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
324      map.put(fam, list);
325    }
326    Path last = null;
327    for (byte[][] range : hfileRanges) {
328      byte[] from = range[0];
329      byte[] to = range[1];
330      Path path = new Path(familyDir, "hfile_" + hfileIdx++);
331      HFileTestUtil.createHFile(util.getConfiguration(), fs, path, fam, qual, from, to, factor);
332      if (useMap) {
333        last = path;
334        list.add(path);
335      }
336    }
337    int expectedRows = hfileIdx * factor;
338
339    TableName tableName = htd.getTableName();
340    if (!util.getAdmin().tableExists(tableName) && (preCreateTable || map != null)) {
341      util.getAdmin().createTable(htd, tableSplitKeys);
342    }
343
344    Configuration conf = util.getConfiguration();
345    if (copyFiles) {
346      conf.setBoolean(LoadIncrementalHFiles.ALWAYS_COPY_FILES, true);
347    }
348    BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
349    List<String> args = Lists.newArrayList(baseDirectory.toString(), tableName.toString());
350    if (depth == 3) {
351      args.add("-loadTable");
352    }
353
354    if (useMap) {
355      if (deleteFile) {
356        fs.delete(last, true);
357      }
358      Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> loaded = loader.bulkLoad(tableName, map);
359      if (deleteFile) {
360        expectedRows -= 1000;
361        for (BulkLoadHFiles.LoadQueueItem item : loaded.keySet()) {
362          if (item.getFilePath().getName().equals(last.getName())) {
363            fail(last + " should be missing");
364          }
365        }
366      }
367    } else {
368      loader.run(args.toArray(new String[] {}));
369    }
370
371    if (copyFiles) {
372      for (Path p : list) {
373        assertTrue(p + " should exist", fs.exists(p));
374      }
375    }
376
377    Table table = util.getConnection().getTable(tableName);
378    try {
379      assertEquals(initRowCount + expectedRows, util.countRows(table));
380    } finally {
381      table.close();
382    }
383
384    return expectedRows;
385  }
386
387  private void runTest(String testName, TableDescriptor htd,
388      boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap,
389      boolean copyFiles, int depth) throws Exception {
390    loadHFiles(testName, htd, util, FAMILY, QUALIFIER, preCreateTable, tableSplitKeys, hfileRanges,
391      useMap, true, copyFiles, 0, 1000, depth);
392
393    final TableName tableName = htd.getTableName();
394    // verify staging folder has been cleaned up
395    Path stagingBasePath =
396        new Path(FSUtils.getRootDir(util.getConfiguration()), HConstants.BULKLOAD_STAGING_DIR_NAME);
397    FileSystem fs = util.getTestFileSystem();
398    if (fs.exists(stagingBasePath)) {
399      FileStatus[] files = fs.listStatus(stagingBasePath);
400      for (FileStatus file : files) {
401        assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
402          file.getPath().getName() != "DONOTERASE");
403      }
404    }
405
406    util.deleteTable(tableName);
407  }
408
409  /**
410   * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the
411   * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the
412   * responses.
413   */
414  @Test
415  public void testTagsSurviveBulkLoadSplit() throws Exception {
416    Path dir = util.getDataTestDirOnTestFS(tn.getMethodName());
417    FileSystem fs = util.getTestFileSystem();
418    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
419    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
420    // table has these split points
421    byte[][] tableSplitKeys = new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"),
422        Bytes.toBytes("jjj"), Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), };
423
424    // creating an hfile that has values that span the split points.
425    byte[] from = Bytes.toBytes("ddd");
426    byte[] to = Bytes.toBytes("ooo");
427    HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs,
428      new Path(familyDir, tn.getMethodName() + "_hfile"), FAMILY, QUALIFIER, from, to, 1000);
429    int expectedRows = 1000;
430
431    TableName tableName = TableName.valueOf(tn.getMethodName());
432    TableDescriptor htd = buildHTD(tableName, BloomType.NONE);
433    util.getAdmin().createTable(htd, tableSplitKeys);
434
435    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
436    String[] args = { dir.toString(), tableName.toString() };
437    loader.run(args);
438
439    Table table = util.getConnection().getTable(tableName);
440    try {
441      assertEquals(expectedRows, util.countRows(table));
442      HFileTestUtil.verifyTags(table);
443    } finally {
444      table.close();
445    }
446
447    util.deleteTable(tableName);
448  }
449
450  /**
451   * Test loading into a column family that does not exist.
452   */
453  @Test
454  public void testNonexistentColumnFamilyLoad() throws Exception {
455    String testName = tn.getMethodName();
456    byte[][][] hFileRanges =
457        new byte[][][] { new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
458            new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, };
459
460    byte[] TABLE = Bytes.toBytes("mytable_" + testName);
461    // set real family name to upper case in purpose to simulate the case that
462    // family name in HFiles is invalid
463    TableDescriptor htd = TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE))
464        .setColumnFamily(ColumnFamilyDescriptorBuilder
465            .of(Bytes.toBytes(new String(FAMILY).toUpperCase(Locale.ROOT))))
466        .build();
467
468    try {
469      runTest(testName, htd, true, SPLIT_KEYS, hFileRanges, false, false, 2);
470      assertTrue("Loading into table with non-existent family should have failed", false);
471    } catch (Exception e) {
472      assertTrue("IOException expected", e instanceof IOException);
473      // further check whether the exception message is correct
474      String errMsg = e.getMessage();
475      assertTrue(
476        "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY +
477            "], current message: [" + errMsg + "]",
478        errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
479    }
480  }
481
482  @Test
483  public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
484    testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
485  }
486
487  @Test
488  public void testNonHfileFolder() throws Exception {
489    testNonHfileFolder("testNonHfileFolder", false);
490  }
491
492  /**
493   * Write a random data file and a non-file in a dir with a valid family name but not part of the
494   * table families. we should we able to bulkload without getting the unmatched family exception.
495   * HBASE-13037/HBASE-13227
496   */
497  private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
498    Path dir = util.getDataTestDirOnTestFS(tableName);
499    FileSystem fs = util.getTestFileSystem();
500    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
501
502    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
503    HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"), FAMILY,
504      QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
505    createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);
506
507    final String NON_FAMILY_FOLDER = "_logs";
508    Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
509    fs.mkdirs(nonFamilyDir);
510    fs.mkdirs(new Path(nonFamilyDir, "non-file"));
511    createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
512
513    Table table = null;
514    try {
515      if (preCreateTable) {
516        table = util.createTable(TableName.valueOf(tableName), FAMILY);
517      } else {
518        table = util.getConnection().getTable(TableName.valueOf(tableName));
519      }
520
521      final String[] args = { dir.toString(), tableName };
522      new LoadIncrementalHFiles(util.getConfiguration()).run(args);
523      assertEquals(500, util.countRows(table));
524    } finally {
525      if (table != null) {
526        table.close();
527      }
528      fs.delete(dir, true);
529    }
530  }
531
532  private static void createRandomDataFile(FileSystem fs, Path path, int size) throws IOException {
533    FSDataOutputStream stream = fs.create(path);
534    try {
535      byte[] data = new byte[1024];
536      for (int i = 0; i < data.length; ++i) {
537        data[i] = (byte) (i & 0xff);
538      }
539      while (size >= data.length) {
540        stream.write(data, 0, data.length);
541        size -= data.length;
542      }
543      if (size > 0) {
544        stream.write(data, 0, size);
545      }
546    } finally {
547      stream.close();
548    }
549  }
550
551  @Test
552  public void testSplitStoreFile() throws IOException {
553    Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
554    FileSystem fs = util.getTestFileSystem();
555    Path testIn = new Path(dir, "testhfile");
556    ColumnFamilyDescriptor familyDesc = ColumnFamilyDescriptorBuilder.of(FAMILY);
557    HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
558      Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
559
560    Path bottomOut = new Path(dir, "bottom.out");
561    Path topOut = new Path(dir, "top.out");
562
563    LoadIncrementalHFiles.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
564      Bytes.toBytes("ggg"), bottomOut, topOut);
565
566    int rowCount = verifyHFile(bottomOut);
567    rowCount += verifyHFile(topOut);
568    assertEquals(1000, rowCount);
569  }
570
571  @Test
572  public void testSplitStoreFileWithNoneToNone() throws IOException {
573    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE);
574  }
575
576  @Test
577  public void testSplitStoreFileWithEncodedToEncoded() throws IOException {
578    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF);
579  }
580
581  @Test
582  public void testSplitStoreFileWithEncodedToNone() throws IOException {
583    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE);
584  }
585
586  @Test
587  public void testSplitStoreFileWithNoneToEncoded() throws IOException {
588    testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF);
589  }
590
591  private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding,
592      DataBlockEncoding cfEncoding) throws IOException {
593    Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
594    FileSystem fs = util.getTestFileSystem();
595    Path testIn = new Path(dir, "testhfile");
596    ColumnFamilyDescriptor familyDesc =
597        ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setDataBlockEncoding(cfEncoding).build();
598    HFileTestUtil.createHFileWithDataBlockEncoding(util.getConfiguration(), fs, testIn,
599      bulkloadEncoding, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
600
601    Path bottomOut = new Path(dir, "bottom.out");
602    Path topOut = new Path(dir, "top.out");
603
604    LoadIncrementalHFiles.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
605      Bytes.toBytes("ggg"), bottomOut, topOut);
606
607    int rowCount = verifyHFile(bottomOut);
608    rowCount += verifyHFile(topOut);
609    assertEquals(1000, rowCount);
610  }
611
612  private int verifyHFile(Path p) throws IOException {
613    Configuration conf = util.getConfiguration();
614    HFile.Reader reader =
615        HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
616    reader.loadFileInfo();
617    HFileScanner scanner = reader.getScanner(false, false);
618    scanner.seekTo();
619    int count = 0;
620    do {
621      count++;
622    } while (scanner.next());
623    assertTrue(count > 0);
624    reader.close();
625    return count;
626  }
627
628  private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
629    Integer value = map.containsKey(first) ? map.get(first) : 0;
630    map.put(first, value + 1);
631
632    value = map.containsKey(last) ? map.get(last) : 0;
633    map.put(last, value - 1);
634  }
635
636  @Test
637  public void testInferBoundaries() {
638    TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
639
640    /*
641     * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s
642     * u----w Should be inferred as: a-----------------k m-------------q r--------------t
643     * u---------x The output should be (m,r,u)
644     */
645
646    String first;
647    String last;
648
649    first = "a";
650    last = "e";
651    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
652
653    first = "r";
654    last = "s";
655    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
656
657    first = "o";
658    last = "p";
659    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
660
661    first = "g";
662    last = "k";
663    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
664
665    first = "v";
666    last = "x";
667    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
668
669    first = "c";
670    last = "i";
671    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
672
673    first = "m";
674    last = "q";
675    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
676
677    first = "s";
678    last = "t";
679    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
680
681    first = "u";
682    last = "w";
683    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
684
685    byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
686    byte[][] compare = new byte[3][];
687    compare[0] = "m".getBytes();
688    compare[1] = "r".getBytes();
689    compare[2] = "u".getBytes();
690
691    assertEquals(3, keysArray.length);
692
693    for (int row = 0; row < keysArray.length; row++) {
694      assertArrayEquals(keysArray[row], compare[row]);
695    }
696  }
697
698  @Test
699  public void testLoadTooMayHFiles() throws Exception {
700    Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
701    FileSystem fs = util.getTestFileSystem();
702    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
703    Path familyDir = new Path(dir, Bytes.toString(FAMILY));
704
705    byte[] from = Bytes.toBytes("begin");
706    byte[] to = Bytes.toBytes("end");
707    for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
708      HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + i),
709        FAMILY, QUALIFIER, from, to, 1000);
710    }
711
712    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration());
713    String[] args = { dir.toString(), "mytable_testLoadTooMayHFiles" };
714    try {
715      loader.run(args);
716      fail("Bulk loading too many files should fail");
717    } catch (IOException ie) {
718      assertTrue(ie.getMessage()
719          .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
720    }
721  }
722
723  @Test(expected = TableNotFoundException.class)
724  public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
725    Configuration conf = util.getConfiguration();
726    conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
727    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
728    String[] args = { "directory", "nonExistingTable" };
729    loader.run(args);
730  }
731
732  @Test
733  public void testTableWithCFNameStartWithUnderScore() throws Exception {
734    Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
735    FileSystem fs = util.getTestFileSystem();
736    dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
737    String family = "_cf";
738    Path familyDir = new Path(dir, family);
739
740    byte[] from = Bytes.toBytes("begin");
741    byte[] to = Bytes.toBytes("end");
742    Configuration conf = util.getConfiguration();
743    String tableName = tn.getMethodName();
744    Table table = util.createTable(TableName.valueOf(tableName), family);
745    HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
746      QUALIFIER, from, to, 1000);
747
748    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
749    String[] args = { dir.toString(), tableName };
750    try {
751      loader.run(args);
752      assertEquals(1000, util.countRows(table));
753    } finally {
754      if (null != table) {
755        table.close();
756      }
757    }
758  }
759}