001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import java.io.ByteArrayOutputStream;
025import java.io.IOException;
026import java.io.PrintStream;
027import java.util.ArrayList;
028import java.util.Arrays;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.HBaseTestingUtil;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.testclassification.LargeTests;
035import org.apache.hadoop.hbase.testclassification.MapReduceTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
038import org.apache.hadoop.hbase.util.LauncherSecurityManager;
039import org.apache.hadoop.mapreduce.Counter;
040import org.apache.hadoop.mapreduce.Job;
041import org.junit.AfterClass;
042import org.junit.BeforeClass;
043import org.junit.ClassRule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049/**
050 * Test the rowcounter map reduce job.
051 */
052@Category({ MapReduceTests.class, LargeTests.class })
053public class TestRowCounter {
054
055  @ClassRule
056  public static final HBaseClassTestRule CLASS_RULE =
057    HBaseClassTestRule.forClass(TestRowCounter.class);
058
059  private static final Logger LOG = LoggerFactory.getLogger(TestRowCounter.class);
060  private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
061  private final static String TABLE_NAME = "testRowCounter";
062  private final static String TABLE_NAME_TS_RANGE = "testRowCounter_ts_range";
063  private final static String COL_FAM = "col_fam";
064  private final static String COL1 = "c1";
065  private final static String COL2 = "c2";
066  private final static String COMPOSITE_COLUMN = "C:A:A";
067  private final static int TOTAL_ROWS = 10;
068  private final static int ROWS_WITH_ONE_COL = 2;
069
070  /**
071   * @throws java.lang.Exception
072   */
073  @BeforeClass
074  public static void setUpBeforeClass() throws Exception {
075    TEST_UTIL.startMiniCluster();
076    Table table = TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes(COL_FAM));
077    writeRows(table, TOTAL_ROWS, ROWS_WITH_ONE_COL);
078    table.close();
079  }
080
081  /**
082   * @throws java.lang.Exception
083   */
084  @AfterClass
085  public static void tearDownAfterClass() throws Exception {
086    TEST_UTIL.shutdownMiniCluster();
087  }
088
089  /**
090   * Test a case when no column was specified in command line arguments.
091   */
092  @Test
093  public void testRowCounterNoColumn() throws Exception {
094    String[] args = new String[] { TABLE_NAME };
095    runRowCount(args, 10);
096  }
097
098  /**
099   * Test a case when the column specified in command line arguments is exclusive for few rows.
100   */
101  @Test
102  public void testRowCounterExclusiveColumn() throws Exception {
103    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COL1 };
104    runRowCount(args, 8);
105  }
106
107  /**
108   * Test a case when the column specified in command line arguments is one for which the qualifier
109   * contains colons.
110   */
111  @Test
112  public void testRowCounterColumnWithColonInQualifier() throws Exception {
113    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN };
114    runRowCount(args, 8);
115  }
116
117  /**
118   * Test a case when the column specified in command line arguments is not part of first KV for a
119   * row.
120   */
121  @Test
122  public void testRowCounterHiddenColumn() throws Exception {
123    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COL2 };
124    runRowCount(args, 10);
125  }
126
127  /**
128   * Test a case when the column specified in command line arguments is exclusive for few rows and
129   * also a row range filter is specified
130   */
131  @Test
132  public void testRowCounterColumnAndRowRange() throws Exception {
133    String[] args = new String[] { TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1 };
134    runRowCount(args, 8);
135  }
136
137  /**
138   * Test a case when a range is specified with single range of start-end keys
139   */
140  @Test
141  public void testRowCounterRowSingleRange() throws Exception {
142    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3" };
143    runRowCount(args, 2);
144  }
145
146  /**
147   * Test a case when a range is specified with single range with end key only
148   */
149  @Test
150  public void testRowCounterRowSingleRangeUpperBound() throws Exception {
151    String[] args = new String[] { TABLE_NAME, "--range=,\\x00row3" };
152    runRowCount(args, 3);
153  }
154
155  /**
156   * Test a case when a range is specified with two ranges where one range is with end key only
157   */
158  @Test
159  public void testRowCounterRowMultiRangeUpperBound() throws Exception {
160    String[] args = new String[] { TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7" };
161    runRowCount(args, 5);
162  }
163
164  /**
165   * Test a case when a range is specified with multiple ranges of start-end keys
166   */
167  @Test
168  public void testRowCounterRowMultiRange() throws Exception {
169    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8" };
170    runRowCount(args, 5);
171  }
172
173  /**
174   * Test a case when a range is specified with multiple ranges of start-end keys; one range is
175   * filled, another two are not
176   */
177  @Test
178  public void testRowCounterRowMultiEmptyRange() throws Exception {
179    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3;;" };
180    runRowCount(args, 2);
181  }
182
183  @Test
184  public void testRowCounter10kRowRange() throws Exception {
185    String tableName = TABLE_NAME + "10k";
186
187    try (
188      Table table = TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
189      writeRows(table, 10000, 0);
190    }
191    String[] args = new String[] { tableName, "--range=\\x00row9872,\\x00row9875" };
192    runRowCount(args, 3);
193  }
194
195  /**
196   * Test a case when the timerange is specified with --starttime and --endtime options
197   */
198  @Test
199  public void testRowCounterTimeRange() throws Exception {
200    final byte[] family = Bytes.toBytes(COL_FAM);
201    final byte[] col1 = Bytes.toBytes(COL1);
202    Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
203    Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
204    Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
205
206    long ts;
207
208    // clean up content of TABLE_NAME
209    Table table =
210      TEST_UTIL.createTable(TableName.valueOf(TABLE_NAME_TS_RANGE), Bytes.toBytes(COL_FAM));
211
212    ts = EnvironmentEdgeManager.currentTime();
213    put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
214    table.put(put1);
215    Thread.sleep(100);
216
217    ts = EnvironmentEdgeManager.currentTime();
218    put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
219    put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
220    table.put(put2);
221    table.put(put3);
222    table.close();
223
224    String[] args = new String[] { TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1, "--starttime=" + 0,
225      "--endtime=" + ts };
226    runRowCount(args, 1);
227
228    args = new String[] { TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1, "--starttime=" + 0,
229      "--endtime=" + (ts - 10) };
230    runRowCount(args, 1);
231
232    args = new String[] { TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1, "--starttime=" + ts,
233      "--endtime=" + (ts + 1000) };
234    runRowCount(args, 2);
235
236    args = new String[] { TABLE_NAME_TS_RANGE, COL_FAM + ":" + COL1,
237      "--starttime=" + (ts - 30 * 1000), "--endtime=" + (ts + 30 * 1000), };
238    runRowCount(args, 3);
239  }
240
241  /**
242   * Run the RowCounter map reduce job and verify the row count.
243   * @param args          the command line arguments to be used for rowcounter job.
244   * @param expectedCount the expected row count (result of map reduce job).
245   */
246  private void runRowCount(String[] args, int expectedCount) throws Exception {
247    RowCounter rowCounter = new RowCounter();
248    rowCounter.setConf(TEST_UTIL.getConfiguration());
249    args = Arrays.copyOf(args, args.length + 1);
250    args[args.length - 1] = "--expectedCount=" + expectedCount;
251    long start = EnvironmentEdgeManager.currentTime();
252    int result = rowCounter.run(args);
253    long duration = EnvironmentEdgeManager.currentTime() - start;
254    LOG.debug("row count duration (ms): " + duration);
255    assertTrue(result == 0);
256  }
257
258  /**
259   * Run the RowCounter map reduce job and verify the row count.
260   * @param args          the command line arguments to be used for rowcounter job.
261   * @param expectedCount the expected row count (result of map reduce job).
262   * @throws Exception in case of any unexpected error.
263   */
264  private void runCreateSubmittableJobWithArgs(String[] args, int expectedCount) throws Exception {
265    Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
266    long start = EnvironmentEdgeManager.currentTime();
267    job.waitForCompletion(true);
268    long duration = EnvironmentEdgeManager.currentTime() - start;
269    LOG.debug("row count duration (ms): " + duration);
270    assertTrue(job.isSuccessful());
271    Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
272    assertEquals(expectedCount, counter.getValue());
273  }
274
275  @Test
276  public void testCreateSubmittableJobWithArgsNoColumn() throws Exception {
277    String[] args = new String[] { TABLE_NAME };
278    runCreateSubmittableJobWithArgs(args, 10);
279  }
280
281  /**
282   * Test a case when the column specified in command line arguments is exclusive for few rows.
283   * @throws Exception in case of any unexpected error.
284   */
285  @Test
286  public void testCreateSubmittableJobWithArgsExclusiveColumn() throws Exception {
287    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COL1 };
288    runCreateSubmittableJobWithArgs(args, 8);
289  }
290
291  /**
292   * Test a case when the column specified in command line arguments is one for which the qualifier
293   * contains colons.
294   * @throws Exception in case of any unexpected error.
295   */
296  @Test
297  public void testCreateSubmittableJobWithArgsColumnWithColonInQualifier() throws Exception {
298    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COMPOSITE_COLUMN };
299    runCreateSubmittableJobWithArgs(args, 8);
300  }
301
302  /**
303   * Test a case when the column specified in command line arguments is not part of first KV for a
304   * row.
305   * @throws Exception in case of any unexpected error.
306   */
307  @Test
308  public void testCreateSubmittableJobWithArgsHiddenColumn() throws Exception {
309    String[] args = new String[] { TABLE_NAME, COL_FAM + ":" + COL2 };
310    runCreateSubmittableJobWithArgs(args, 10);
311  }
312
313  /**
314   * Test a case when the column specified in command line arguments is exclusive for few rows and
315   * also a row range filter is specified
316   * @throws Exception in case of any unexpected error.
317   */
318  @Test
319  public void testCreateSubmittableJobWithArgsColumnAndRowRange() throws Exception {
320    String[] args = new String[] { TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1 };
321    runCreateSubmittableJobWithArgs(args, 8);
322  }
323
324  /**
325   * Test a case when a range is specified with single range of start-end keys
326   * @throws Exception in case of any unexpected error.
327   */
328  @Test
329  public void testCreateSubmittableJobWithArgsRowSingleRange() throws Exception {
330    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3" };
331    runCreateSubmittableJobWithArgs(args, 2);
332  }
333
334  /**
335   * Test a case when a range is specified with single range with end key only
336   * @throws Exception in case of any unexpected error.
337   */
338  @Test
339  public void testCreateSubmittableJobWithArgsRowSingleRangeUpperBound() throws Exception {
340    String[] args = new String[] { TABLE_NAME, "--range=,\\x00row3" };
341    runCreateSubmittableJobWithArgs(args, 3);
342  }
343
344  /**
345   * Test a case when a range is specified with two ranges where one range is with end key only
346   * @throws Exception in case of any unexpected error.
347   */
348  @Test
349  public void testCreateSubmittableJobWithArgsRowMultiRangeUpperBound() throws Exception {
350    String[] args = new String[] { TABLE_NAME, "--range=,\\x00row3;\\x00row5,\\x00row7" };
351    runCreateSubmittableJobWithArgs(args, 5);
352  }
353
354  /**
355   * Test a case when a range is specified with multiple ranges of start-end keys
356   * @throws Exception in case of any unexpected error.
357   */
358  @Test
359  public void testCreateSubmittableJobWithArgsRowMultiRange() throws Exception {
360    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3;\\x00row5,\\x00row8" };
361    runCreateSubmittableJobWithArgs(args, 5);
362  }
363
364  /**
365   * Test a case when a range is specified with multiple ranges of start-end keys; one range is
366   * filled, another two are not
367   * @throws Exception in case of any unexpected error.
368   */
369  @Test
370  public void testCreateSubmittableJobWithArgsRowMultiEmptyRange() throws Exception {
371    String[] args = new String[] { TABLE_NAME, "--range=\\x00row1,\\x00row3;;" };
372    runCreateSubmittableJobWithArgs(args, 2);
373  }
374
375  @Test
376  public void testCreateSubmittableJobWithArgs10kRowRange() throws Exception {
377    String tableName = TABLE_NAME + "CreateSubmittableJobWithArgs10kRowRange";
378
379    try (
380      Table table = TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(COL_FAM))) {
381      writeRows(table, 10000, 0);
382    }
383    String[] args = new String[] { tableName, "--range=\\x00row9872,\\x00row9875" };
384    runCreateSubmittableJobWithArgs(args, 3);
385  }
386
387  /**
388   * Test a case when the timerange is specified with --starttime and --endtime options
389   * @throws Exception in case of any unexpected error.
390   */
391  @Test
392  public void testCreateSubmittableJobWithArgsTimeRange() throws Exception {
393    final byte[] family = Bytes.toBytes(COL_FAM);
394    final byte[] col1 = Bytes.toBytes(COL1);
395    Put put1 = new Put(Bytes.toBytes("row_timerange_" + 1));
396    Put put2 = new Put(Bytes.toBytes("row_timerange_" + 2));
397    Put put3 = new Put(Bytes.toBytes("row_timerange_" + 3));
398
399    long ts;
400
401    String tableName = TABLE_NAME_TS_RANGE + "CreateSubmittableJobWithArgs";
402    // clean up content of TABLE_NAME
403    Table table = TEST_UTIL.createTable(TableName.valueOf(tableName), Bytes.toBytes(COL_FAM));
404
405    ts = EnvironmentEdgeManager.currentTime();
406    put1.addColumn(family, col1, ts, Bytes.toBytes("val1"));
407    table.put(put1);
408    Thread.sleep(100);
409
410    ts = EnvironmentEdgeManager.currentTime();
411    put2.addColumn(family, col1, ts, Bytes.toBytes("val2"));
412    put3.addColumn(family, col1, ts, Bytes.toBytes("val3"));
413    table.put(put2);
414    table.put(put3);
415    table.close();
416
417    String[] args =
418      new String[] { tableName, COL_FAM + ":" + COL1, "--starttime=" + 0, "--endtime=" + ts };
419    runCreateSubmittableJobWithArgs(args, 1);
420
421    args = new String[] { tableName, COL_FAM + ":" + COL1, "--starttime=" + 0,
422      "--endtime=" + (ts - 10) };
423    runCreateSubmittableJobWithArgs(args, 1);
424
425    args = new String[] { tableName, COL_FAM + ":" + COL1, "--starttime=" + ts,
426      "--endtime=" + (ts + 1000) };
427    runCreateSubmittableJobWithArgs(args, 2);
428
429    args = new String[] { tableName, COL_FAM + ":" + COL1, "--starttime=" + (ts - 30 * 1000),
430      "--endtime=" + (ts + 30 * 1000), };
431    runCreateSubmittableJobWithArgs(args, 3);
432  }
433
434  /**
435   * Writes TOTAL_ROWS number of distinct rows in to the table. Few rows have two columns, Few have
436   * one.
437   */
438  private static void writeRows(Table table, int totalRows, int rowsWithOneCol) throws IOException {
439    final byte[] family = Bytes.toBytes(COL_FAM);
440    final byte[] value = Bytes.toBytes("abcd");
441    final byte[] col1 = Bytes.toBytes(COL1);
442    final byte[] col2 = Bytes.toBytes(COL2);
443    final byte[] col3 = Bytes.toBytes(COMPOSITE_COLUMN);
444    ArrayList<Put> rowsUpdate = new ArrayList<>();
445    // write few rows with two columns
446    int i = 0;
447    for (; i < totalRows - rowsWithOneCol; i++) {
448      // Use binary rows values to test for HBASE-15287.
449      byte[] row = Bytes.toBytesBinary("\\x00row" + i);
450      Put put = new Put(row);
451      put.addColumn(family, col1, value);
452      put.addColumn(family, col2, value);
453      put.addColumn(family, col3, value);
454      rowsUpdate.add(put);
455    }
456
457    // write few rows with only one column
458    for (; i < totalRows; i++) {
459      byte[] row = Bytes.toBytes("row" + i);
460      Put put = new Put(row);
461      put.addColumn(family, col2, value);
462      rowsUpdate.add(put);
463    }
464    table.put(rowsUpdate);
465  }
466
467  /**
468   * test main method. Import should print help and call System.exit
469   */
470  @Test
471  public void testImportMain() throws Exception {
472    SecurityManager SECURITY_MANAGER = System.getSecurityManager();
473    LauncherSecurityManager newSecurityManager = new LauncherSecurityManager();
474    System.setSecurityManager(newSecurityManager);
475    String[] args = {};
476    try {
477      try {
478        RowCounter.main(args);
479        fail("should be SecurityException");
480      } catch (SecurityException e) {
481        assertEquals(RowCounter.EXIT_FAILURE, newSecurityManager.getExitCode());
482      }
483      try {
484        args = new String[2];
485        args[0] = "table";
486        args[1] = "--range=1";
487        RowCounter.main(args);
488        fail("should be SecurityException");
489      } catch (SecurityException e) {
490        assertEquals(RowCounter.EXIT_FAILURE, newSecurityManager.getExitCode());
491      }
492
493    } finally {
494      System.setSecurityManager(SECURITY_MANAGER);
495    }
496  }
497
498  @Test
499  public void testHelp() throws Exception {
500    PrintStream oldPrintStream = System.out;
501    try {
502      ByteArrayOutputStream data = new ByteArrayOutputStream();
503      PrintStream stream = new PrintStream(data);
504      System.setOut(stream);
505      String[] args = { "-h" };
506      runRowCount(args, 0);
507      assertUsageContent(data.toString());
508      args = new String[] { "--help" };
509      runRowCount(args, 0);
510      assertUsageContent(data.toString());
511    } finally {
512      System.setOut(oldPrintStream);
513    }
514  }
515
516  @Test
517  public void testInvalidTable() throws Exception {
518    try {
519      String[] args = { "invalid" };
520      runRowCount(args, 0);
521      fail("RowCounter should had failed with invalid table.");
522    } catch (Throwable e) {
523      assertTrue(e instanceof AssertionError);
524    }
525  }
526
527  private void assertUsageContent(String usage) {
528    assertTrue(usage
529      .contains("usage: hbase rowcounter " + "<tablename> [options] [<column1> <column2>...]"));
530    assertTrue(usage.contains("Options:\n"));
531    assertTrue(usage.contains(
532      "--starttime=<arg>       " + "starting time filter to start counting rows from.\n"));
533    assertTrue(usage.contains("--endtime=<arg>         "
534      + "end time filter limit, to only count rows up to this timestamp.\n"));
535    assertTrue(usage
536      .contains("--range=<arg>           " + "[startKey],[endKey][;[startKey],[endKey]...]]\n"));
537    assertTrue(usage.contains("--expectedCount=<arg>   expected number of rows to be count.\n"));
538    assertTrue(
539      usage.contains("For performance, " + "consider the following configuration properties:\n"));
540    assertTrue(usage.contains("-Dhbase.client.scanner.caching=100\n"));
541    assertTrue(usage.contains("-Dmapreduce.map.speculative=false\n"));
542  }
543
544}