001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.tool; 019 020import static org.junit.Assert.assertArrayEquals; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023import static org.junit.Assert.fail; 024 025import java.io.IOException; 026import java.nio.ByteBuffer; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.Locale; 030import java.util.Map; 031import java.util.TreeMap; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.FSDataOutputStream; 034import org.apache.hadoop.fs.FileStatus; 035import org.apache.hadoop.fs.FileSystem; 036import org.apache.hadoop.fs.Path; 037import org.apache.hadoop.hbase.HBaseClassTestRule; 038import org.apache.hadoop.hbase.HBaseTestingUtility; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.NamespaceDescriptor; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.TableNotFoundException; 043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 045import org.apache.hadoop.hbase.client.Table; 046import org.apache.hadoop.hbase.client.TableDescriptor; 047import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 048import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags; 049import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 050import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 051import org.apache.hadoop.hbase.io.hfile.CacheConfig; 052import org.apache.hadoop.hbase.io.hfile.HFile; 053import org.apache.hadoop.hbase.io.hfile.HFileScanner; 054import org.apache.hadoop.hbase.regionserver.BloomType; 055import org.apache.hadoop.hbase.testclassification.LargeTests; 056import org.apache.hadoop.hbase.testclassification.MiscTests; 057import org.apache.hadoop.hbase.util.Bytes; 058import org.apache.hadoop.hbase.util.FSUtils; 059import org.apache.hadoop.hbase.util.HFileTestUtil; 060import org.junit.AfterClass; 061import org.junit.BeforeClass; 062import org.junit.ClassRule; 063import org.junit.Rule; 064import org.junit.Test; 065import org.junit.experimental.categories.Category; 066import org.junit.rules.TestName; 067 068import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 069 070/** 071 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run 072 * faster than the full MR cluster tests in TestHFileOutputFormat 073 */ 074@Category({ MiscTests.class, LargeTests.class }) 075public class TestLoadIncrementalHFiles { 076 077 @ClassRule 078 public static final HBaseClassTestRule CLASS_RULE = 079 HBaseClassTestRule.forClass(TestLoadIncrementalHFiles.class); 080 081 @Rule 082 public TestName tn = new TestName(); 083 084 private static final byte[] QUALIFIER = Bytes.toBytes("myqual"); 085 private static final byte[] FAMILY = Bytes.toBytes("myfam"); 086 private static final String NAMESPACE = "bulkNS"; 087 088 static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found"; 089 static final int MAX_FILES_PER_REGION_PER_FAMILY = 4; 090 091 private static final byte[][] SPLIT_KEYS = 092 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ppp") }; 093 094 static HBaseTestingUtility util = new HBaseTestingUtility(); 095 096 @BeforeClass 097 public static void setUpBeforeClass() throws Exception { 098 util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, ""); 099 util.getConfiguration().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 100 MAX_FILES_PER_REGION_PER_FAMILY); 101 // change default behavior so that tag values are returned with normal rpcs 102 util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY, 103 KeyValueCodecWithTags.class.getCanonicalName()); 104 util.startMiniCluster(); 105 106 setupNamespace(); 107 } 108 109 protected static void setupNamespace() throws Exception { 110 util.getAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build()); 111 } 112 113 @AfterClass 114 public static void tearDownAfterClass() throws Exception { 115 util.shutdownMiniCluster(); 116 } 117 118 @Test 119 public void testSimpleLoadWithMap() throws Exception { 120 runTest("testSimpleLoadWithMap", BloomType.NONE, 121 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") }, 122 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, }, 123 true); 124 } 125 126 /** 127 * Test case that creates some regions and loads HFiles that fit snugly inside those regions 128 */ 129 @Test 130 public void testSimpleLoad() throws Exception { 131 runTest("testSimpleLoad", BloomType.NONE, 132 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") }, 133 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, }); 134 } 135 136 @Test 137 public void testSimpleLoadWithFileCopy() throws Exception { 138 String testName = tn.getMethodName(); 139 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName); 140 runTest(testName, buildHTD(TableName.valueOf(TABLE_NAME), BloomType.NONE), 141 false, null, new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") }, 142 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, }, 143 false, true, 2); 144 } 145 146 /** 147 * Test case that creates some regions and loads HFiles that cross the boundaries of those regions 148 */ 149 @Test 150 public void testRegionCrossingLoad() throws Exception { 151 runTest("testRegionCrossingLoad", BloomType.NONE, 152 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") }, 153 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, }); 154 } 155 156 /** 157 * Test loading into a column family that has a ROW bloom filter. 158 */ 159 @Test 160 public void testRegionCrossingRowBloom() throws Exception { 161 runTest("testRegionCrossingLoadRowBloom", BloomType.ROW, 162 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") }, 163 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, }); 164 } 165 166 /** 167 * Test loading into a column family that has a ROWCOL bloom filter. 168 */ 169 @Test 170 public void testRegionCrossingRowColBloom() throws Exception { 171 runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL, 172 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") }, 173 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, }); 174 } 175 176 /** 177 * Test case that creates some regions and loads HFiles that have different region boundaries than 178 * the table pre-split. 179 */ 180 @Test 181 public void testSimpleHFileSplit() throws Exception { 182 runTest("testHFileSplit", BloomType.NONE, 183 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"), 184 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), }, 185 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("lll") }, 186 new byte[][] { Bytes.toBytes("mmm"), Bytes.toBytes("zzz") }, }); 187 } 188 189 /** 190 * Test case that creates some regions and loads HFiles that cross the boundaries and have 191 * different region boundaries than the table pre-split. 192 */ 193 @Test 194 public void testRegionCrossingHFileSplit() throws Exception { 195 testRegionCrossingHFileSplit(BloomType.NONE); 196 } 197 198 /** 199 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom 200 * filter and a different region boundaries than the table pre-split. 201 */ 202 @Test 203 public void testRegionCrossingHFileSplitRowBloom() throws Exception { 204 testRegionCrossingHFileSplit(BloomType.ROW); 205 } 206 207 /** 208 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL 209 * bloom filter and a different region boundaries than the table pre-split. 210 */ 211 @Test 212 public void testRegionCrossingHFileSplitRowColBloom() throws Exception { 213 testRegionCrossingHFileSplit(BloomType.ROWCOL); 214 } 215 216 @Test 217 public void testSplitALot() throws Exception { 218 runTest("testSplitALot", BloomType.NONE, 219 new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), 220 Bytes.toBytes("ddd"), Bytes.toBytes("eee"), Bytes.toBytes("fff"), Bytes.toBytes("ggg"), 221 Bytes.toBytes("hhh"), Bytes.toBytes("iii"), Bytes.toBytes("lll"), Bytes.toBytes("mmm"), 222 Bytes.toBytes("nnn"), Bytes.toBytes("ooo"), Bytes.toBytes("ppp"), Bytes.toBytes("qqq"), 223 Bytes.toBytes("rrr"), Bytes.toBytes("sss"), Bytes.toBytes("ttt"), Bytes.toBytes("uuu"), 224 Bytes.toBytes("vvv"), Bytes.toBytes("zzz"), }, 225 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") }, }); 226 } 227 228 private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception { 229 runTest("testHFileSplit" + bloomType + "Bloom", bloomType, 230 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"), 231 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), }, 232 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") }, 233 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, }); 234 } 235 236 private TableDescriptor buildHTD(TableName tableName, BloomType bloomType) { 237 return TableDescriptorBuilder.newBuilder(tableName) 238 .setColumnFamily( 239 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setBloomFilterType(bloomType).build()) 240 .build(); 241 } 242 243 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges) 244 throws Exception { 245 runTest(testName, bloomType, null, hfileRanges); 246 } 247 248 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges, boolean useMap) 249 throws Exception { 250 runTest(testName, bloomType, null, hfileRanges, useMap); 251 } 252 253 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys, 254 byte[][][] hfileRanges) throws Exception { 255 runTest(testName, bloomType, tableSplitKeys, hfileRanges, false); 256 } 257 258 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys, 259 byte[][][] hfileRanges, boolean useMap) throws Exception { 260 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName); 261 final boolean preCreateTable = tableSplitKeys != null; 262 263 // Run the test bulkloading the table to the default namespace 264 final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME); 265 runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges, 266 useMap, 2); 267 268 269 /* Run the test bulkloading the table from a depth of 3 270 directory structure is now 271 baseDirectory 272 -- regionDir 273 -- familyDir 274 -- storeFileDir 275 */ 276 if (preCreateTable) { 277 runTest(testName + 2, TABLE_WITHOUT_NS, bloomType, true, tableSplitKeys, hfileRanges, 278 false, 3); 279 } 280 281 // Run the test bulkloading the table to the specified namespace 282 final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME); 283 runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges, 284 useMap, 2); 285 } 286 287 private void runTest(String testName, TableName tableName, BloomType bloomType, 288 boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, 289 boolean useMap, int depth) throws Exception { 290 TableDescriptor htd = buildHTD(tableName, bloomType); 291 runTest(testName, htd, preCreateTable, tableSplitKeys, hfileRanges, useMap, false, depth); 292 } 293 294 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util, 295 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys, 296 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles, 297 int initRowCount, int factor) throws Exception { 298 return loadHFiles(testName, htd, util, fam, qual, preCreateTable, tableSplitKeys, hfileRanges, 299 useMap, deleteFile, copyFiles, initRowCount, factor, 2); 300 } 301 302 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util, 303 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys, 304 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles, 305 int initRowCount, int factor, int depth) throws Exception { 306 Path baseDirectory = util.getDataTestDirOnTestFS(testName); 307 FileSystem fs = util.getTestFileSystem(); 308 baseDirectory = baseDirectory.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 309 Path parentDir = baseDirectory; 310 if (depth == 3) { 311 assert !useMap; 312 parentDir = new Path(baseDirectory, "someRegion"); 313 } 314 Path familyDir = new Path(parentDir, Bytes.toString(fam)); 315 316 int hfileIdx = 0; 317 Map<byte[], List<Path>> map = null; 318 List<Path> list = null; 319 if (useMap || copyFiles) { 320 list = new ArrayList<>(); 321 } 322 if (useMap) { 323 map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 324 map.put(fam, list); 325 } 326 Path last = null; 327 for (byte[][] range : hfileRanges) { 328 byte[] from = range[0]; 329 byte[] to = range[1]; 330 Path path = new Path(familyDir, "hfile_" + hfileIdx++); 331 HFileTestUtil.createHFile(util.getConfiguration(), fs, path, fam, qual, from, to, factor); 332 if (useMap) { 333 last = path; 334 list.add(path); 335 } 336 } 337 int expectedRows = hfileIdx * factor; 338 339 TableName tableName = htd.getTableName(); 340 if (!util.getAdmin().tableExists(tableName) && (preCreateTable || map != null)) { 341 util.getAdmin().createTable(htd, tableSplitKeys); 342 } 343 344 Configuration conf = util.getConfiguration(); 345 if (copyFiles) { 346 conf.setBoolean(LoadIncrementalHFiles.ALWAYS_COPY_FILES, true); 347 } 348 BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf); 349 List<String> args = Lists.newArrayList(baseDirectory.toString(), tableName.toString()); 350 if (depth == 3) { 351 args.add("-loadTable"); 352 } 353 354 if (useMap) { 355 if (deleteFile) { 356 fs.delete(last, true); 357 } 358 Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> loaded = loader.bulkLoad(tableName, map); 359 if (deleteFile) { 360 expectedRows -= 1000; 361 for (BulkLoadHFiles.LoadQueueItem item : loaded.keySet()) { 362 if (item.getFilePath().getName().equals(last.getName())) { 363 fail(last + " should be missing"); 364 } 365 } 366 } 367 } else { 368 loader.run(args.toArray(new String[] {})); 369 } 370 371 if (copyFiles) { 372 for (Path p : list) { 373 assertTrue(p + " should exist", fs.exists(p)); 374 } 375 } 376 377 Table table = util.getConnection().getTable(tableName); 378 try { 379 assertEquals(initRowCount + expectedRows, util.countRows(table)); 380 } finally { 381 table.close(); 382 } 383 384 return expectedRows; 385 } 386 387 private void runTest(String testName, TableDescriptor htd, 388 boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap, 389 boolean copyFiles, int depth) throws Exception { 390 loadHFiles(testName, htd, util, FAMILY, QUALIFIER, preCreateTable, tableSplitKeys, hfileRanges, 391 useMap, true, copyFiles, 0, 1000, depth); 392 393 final TableName tableName = htd.getTableName(); 394 // verify staging folder has been cleaned up 395 Path stagingBasePath = 396 new Path(FSUtils.getRootDir(util.getConfiguration()), HConstants.BULKLOAD_STAGING_DIR_NAME); 397 FileSystem fs = util.getTestFileSystem(); 398 if (fs.exists(stagingBasePath)) { 399 FileStatus[] files = fs.listStatus(stagingBasePath); 400 for (FileStatus file : files) { 401 assertTrue("Folder=" + file.getPath() + " is not cleaned up.", 402 file.getPath().getName() != "DONOTERASE"); 403 } 404 } 405 406 util.deleteTable(tableName); 407 } 408 409 /** 410 * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the 411 * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the 412 * responses. 413 */ 414 @Test 415 public void testTagsSurviveBulkLoadSplit() throws Exception { 416 Path dir = util.getDataTestDirOnTestFS(tn.getMethodName()); 417 FileSystem fs = util.getTestFileSystem(); 418 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 419 Path familyDir = new Path(dir, Bytes.toString(FAMILY)); 420 // table has these split points 421 byte[][] tableSplitKeys = new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), 422 Bytes.toBytes("jjj"), Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), }; 423 424 // creating an hfile that has values that span the split points. 425 byte[] from = Bytes.toBytes("ddd"); 426 byte[] to = Bytes.toBytes("ooo"); 427 HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs, 428 new Path(familyDir, tn.getMethodName() + "_hfile"), FAMILY, QUALIFIER, from, to, 1000); 429 int expectedRows = 1000; 430 431 TableName tableName = TableName.valueOf(tn.getMethodName()); 432 TableDescriptor htd = buildHTD(tableName, BloomType.NONE); 433 util.getAdmin().createTable(htd, tableSplitKeys); 434 435 LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()); 436 String[] args = { dir.toString(), tableName.toString() }; 437 loader.run(args); 438 439 Table table = util.getConnection().getTable(tableName); 440 try { 441 assertEquals(expectedRows, util.countRows(table)); 442 HFileTestUtil.verifyTags(table); 443 } finally { 444 table.close(); 445 } 446 447 util.deleteTable(tableName); 448 } 449 450 /** 451 * Test loading into a column family that does not exist. 452 */ 453 @Test 454 public void testNonexistentColumnFamilyLoad() throws Exception { 455 String testName = tn.getMethodName(); 456 byte[][][] hFileRanges = 457 new byte[][][] { new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("ccc") }, 458 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, }; 459 460 byte[] TABLE = Bytes.toBytes("mytable_" + testName); 461 // set real family name to upper case in purpose to simulate the case that 462 // family name in HFiles is invalid 463 TableDescriptor htd = TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE)) 464 .setColumnFamily(ColumnFamilyDescriptorBuilder 465 .of(Bytes.toBytes(new String(FAMILY).toUpperCase(Locale.ROOT)))) 466 .build(); 467 468 try { 469 runTest(testName, htd, true, SPLIT_KEYS, hFileRanges, false, false, 2); 470 assertTrue("Loading into table with non-existent family should have failed", false); 471 } catch (Exception e) { 472 assertTrue("IOException expected", e instanceof IOException); 473 // further check whether the exception message is correct 474 String errMsg = e.getMessage(); 475 assertTrue( 476 "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY + 477 "], current message: [" + errMsg + "]", 478 errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY)); 479 } 480 } 481 482 @Test 483 public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception { 484 testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true); 485 } 486 487 @Test 488 public void testNonHfileFolder() throws Exception { 489 testNonHfileFolder("testNonHfileFolder", false); 490 } 491 492 /** 493 * Write a random data file and a non-file in a dir with a valid family name but not part of the 494 * table families. we should we able to bulkload without getting the unmatched family exception. 495 * HBASE-13037/HBASE-13227 496 */ 497 private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception { 498 Path dir = util.getDataTestDirOnTestFS(tableName); 499 FileSystem fs = util.getTestFileSystem(); 500 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 501 502 Path familyDir = new Path(dir, Bytes.toString(FAMILY)); 503 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"), FAMILY, 504 QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500); 505 createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024); 506 507 final String NON_FAMILY_FOLDER = "_logs"; 508 Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER); 509 fs.mkdirs(nonFamilyDir); 510 fs.mkdirs(new Path(nonFamilyDir, "non-file")); 511 createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024); 512 513 Table table = null; 514 try { 515 if (preCreateTable) { 516 table = util.createTable(TableName.valueOf(tableName), FAMILY); 517 } else { 518 table = util.getConnection().getTable(TableName.valueOf(tableName)); 519 } 520 521 final String[] args = { dir.toString(), tableName }; 522 new LoadIncrementalHFiles(util.getConfiguration()).run(args); 523 assertEquals(500, util.countRows(table)); 524 } finally { 525 if (table != null) { 526 table.close(); 527 } 528 fs.delete(dir, true); 529 } 530 } 531 532 private static void createRandomDataFile(FileSystem fs, Path path, int size) throws IOException { 533 FSDataOutputStream stream = fs.create(path); 534 try { 535 byte[] data = new byte[1024]; 536 for (int i = 0; i < data.length; ++i) { 537 data[i] = (byte) (i & 0xff); 538 } 539 while (size >= data.length) { 540 stream.write(data, 0, data.length); 541 size -= data.length; 542 } 543 if (size > 0) { 544 stream.write(data, 0, size); 545 } 546 } finally { 547 stream.close(); 548 } 549 } 550 551 @Test 552 public void testSplitStoreFile() throws IOException { 553 Path dir = util.getDataTestDirOnTestFS("testSplitHFile"); 554 FileSystem fs = util.getTestFileSystem(); 555 Path testIn = new Path(dir, "testhfile"); 556 ColumnFamilyDescriptor familyDesc = ColumnFamilyDescriptorBuilder.of(FAMILY); 557 HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER, 558 Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); 559 560 Path bottomOut = new Path(dir, "bottom.out"); 561 Path topOut = new Path(dir, "top.out"); 562 563 LoadIncrementalHFiles.splitStoreFile(util.getConfiguration(), testIn, familyDesc, 564 Bytes.toBytes("ggg"), bottomOut, topOut); 565 566 int rowCount = verifyHFile(bottomOut); 567 rowCount += verifyHFile(topOut); 568 assertEquals(1000, rowCount); 569 } 570 571 @Test 572 public void testSplitStoreFileWithNoneToNone() throws IOException { 573 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE); 574 } 575 576 @Test 577 public void testSplitStoreFileWithEncodedToEncoded() throws IOException { 578 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF); 579 } 580 581 @Test 582 public void testSplitStoreFileWithEncodedToNone() throws IOException { 583 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE); 584 } 585 586 @Test 587 public void testSplitStoreFileWithNoneToEncoded() throws IOException { 588 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF); 589 } 590 591 private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding, 592 DataBlockEncoding cfEncoding) throws IOException { 593 Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding"); 594 FileSystem fs = util.getTestFileSystem(); 595 Path testIn = new Path(dir, "testhfile"); 596 ColumnFamilyDescriptor familyDesc = 597 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setDataBlockEncoding(cfEncoding).build(); 598 HFileTestUtil.createHFileWithDataBlockEncoding(util.getConfiguration(), fs, testIn, 599 bulkloadEncoding, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); 600 601 Path bottomOut = new Path(dir, "bottom.out"); 602 Path topOut = new Path(dir, "top.out"); 603 604 LoadIncrementalHFiles.splitStoreFile(util.getConfiguration(), testIn, familyDesc, 605 Bytes.toBytes("ggg"), bottomOut, topOut); 606 607 int rowCount = verifyHFile(bottomOut); 608 rowCount += verifyHFile(topOut); 609 assertEquals(1000, rowCount); 610 } 611 612 private int verifyHFile(Path p) throws IOException { 613 Configuration conf = util.getConfiguration(); 614 HFile.Reader reader = 615 HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf); 616 reader.loadFileInfo(); 617 HFileScanner scanner = reader.getScanner(false, false); 618 scanner.seekTo(); 619 int count = 0; 620 do { 621 count++; 622 } while (scanner.next()); 623 assertTrue(count > 0); 624 reader.close(); 625 return count; 626 } 627 628 private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) { 629 Integer value = map.containsKey(first) ? map.get(first) : 0; 630 map.put(first, value + 1); 631 632 value = map.containsKey(last) ? map.get(last) : 0; 633 map.put(last, value - 1); 634 } 635 636 @Test 637 public void testInferBoundaries() { 638 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 639 640 /* 641 * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s 642 * u----w Should be inferred as: a-----------------k m-------------q r--------------t 643 * u---------x The output should be (m,r,u) 644 */ 645 646 String first; 647 String last; 648 649 first = "a"; 650 last = "e"; 651 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 652 653 first = "r"; 654 last = "s"; 655 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 656 657 first = "o"; 658 last = "p"; 659 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 660 661 first = "g"; 662 last = "k"; 663 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 664 665 first = "v"; 666 last = "x"; 667 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 668 669 first = "c"; 670 last = "i"; 671 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 672 673 first = "m"; 674 last = "q"; 675 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 676 677 first = "s"; 678 last = "t"; 679 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 680 681 first = "u"; 682 last = "w"; 683 addStartEndKeysForTest(map, first.getBytes(), last.getBytes()); 684 685 byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map); 686 byte[][] compare = new byte[3][]; 687 compare[0] = "m".getBytes(); 688 compare[1] = "r".getBytes(); 689 compare[2] = "u".getBytes(); 690 691 assertEquals(3, keysArray.length); 692 693 for (int row = 0; row < keysArray.length; row++) { 694 assertArrayEquals(keysArray[row], compare[row]); 695 } 696 } 697 698 @Test 699 public void testLoadTooMayHFiles() throws Exception { 700 Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles"); 701 FileSystem fs = util.getTestFileSystem(); 702 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 703 Path familyDir = new Path(dir, Bytes.toString(FAMILY)); 704 705 byte[] from = Bytes.toBytes("begin"); 706 byte[] to = Bytes.toBytes("end"); 707 for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) { 708 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + i), 709 FAMILY, QUALIFIER, from, to, 1000); 710 } 711 712 LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()); 713 String[] args = { dir.toString(), "mytable_testLoadTooMayHFiles" }; 714 try { 715 loader.run(args); 716 fail("Bulk loading too many files should fail"); 717 } catch (IOException ie) { 718 assertTrue(ie.getMessage() 719 .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles")); 720 } 721 } 722 723 @Test(expected = TableNotFoundException.class) 724 public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception { 725 Configuration conf = util.getConfiguration(); 726 conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no"); 727 LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); 728 String[] args = { "directory", "nonExistingTable" }; 729 loader.run(args); 730 } 731 732 @Test 733 public void testTableWithCFNameStartWithUnderScore() throws Exception { 734 Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore"); 735 FileSystem fs = util.getTestFileSystem(); 736 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 737 String family = "_cf"; 738 Path familyDir = new Path(dir, family); 739 740 byte[] from = Bytes.toBytes("begin"); 741 byte[] to = Bytes.toBytes("end"); 742 Configuration conf = util.getConfiguration(); 743 String tableName = tn.getMethodName(); 744 Table table = util.createTable(TableName.valueOf(tableName), family); 745 HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family), 746 QUALIFIER, from, to, 1000); 747 748 LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); 749 String[] args = { dir.toString(), tableName }; 750 try { 751 loader.run(args); 752 assertEquals(1000, util.countRows(table)); 753 } finally { 754 if (null != table) { 755 table.close(); 756 } 757 } 758 } 759}