001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapred;
019
020import java.io.IOException;
021import org.apache.hadoop.conf.Configured;
022import org.apache.hadoop.fs.Path;
023import org.apache.hadoop.hbase.HBaseConfiguration;
024import org.apache.hadoop.hbase.client.Result;
025import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
026import org.apache.hadoop.mapred.FileOutputFormat;
027import org.apache.hadoop.mapred.JobClient;
028import org.apache.hadoop.mapred.JobConf;
029import org.apache.hadoop.mapred.OutputCollector;
030import org.apache.hadoop.mapred.Reporter;
031import org.apache.hadoop.util.Tool;
032import org.apache.hadoop.util.ToolRunner;
033import org.apache.yetus.audience.InterfaceAudience;
034
035/**
036 * A job with a map to count rows. Map outputs table rows IF the input row has columns that have
037 * content. Uses a org.apache.hadoop.mapred.lib.IdentityReducer
038 */
039@InterfaceAudience.Public
040public class RowCounter extends Configured implements Tool {
041  // Name of this 'program'
042  static final String NAME = "rowcounter";
043
044  /**
045   * Mapper that runs the count.
046   */
047  static class RowCounterMapper implements TableMap<ImmutableBytesWritable, Result> {
048    private static enum Counters {
049      ROWS
050    }
051
052    public void map(ImmutableBytesWritable row, Result values,
053      OutputCollector<ImmutableBytesWritable, Result> output, Reporter reporter)
054      throws IOException {
055      // Count every row containing data, whether it's in qualifiers or values
056      reporter.incrCounter(Counters.ROWS, 1);
057    }
058
059    public void configure(JobConf jc) {
060      // Nothing to do.
061    }
062
063    public void close() throws IOException {
064      // Nothing to do.
065    }
066  }
067
068  /** Returns the JobConf */
069  public JobConf createSubmittableJob(String[] args) throws IOException {
070    JobConf c = new JobConf(getConf(), getClass());
071    c.setJobName(NAME);
072    // Columns are space delimited
073    StringBuilder sb = new StringBuilder();
074    final int columnoffset = 2;
075    for (int i = columnoffset; i < args.length; i++) {
076      if (i > columnoffset) {
077        sb.append(" ");
078      }
079      sb.append(args[i]);
080    }
081    // Second argument is the table name.
082    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class,
083      ImmutableBytesWritable.class, Result.class, c);
084    c.setNumReduceTasks(0);
085    // First arg is the output directory.
086    FileOutputFormat.setOutputPath(c, new Path(args[0]));
087    return c;
088  }
089
090  static int printUsage() {
091    System.out.println(NAME + " <outputdir> <tablename> <column1> [<column2>...]");
092    return -1;
093  }
094
095  public int run(final String[] args) throws Exception {
096    // Make sure there are at least 3 parameters
097    if (args.length < 3) {
098      System.err.println("ERROR: Wrong number of parameters: " + args.length);
099      return printUsage();
100    }
101    JobClient.runJob(createSubmittableJob(args));
102    return 0;
103  }
104
105  public static void main(String[] args) throws Exception {
106    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
107    System.exit(errCode);
108  }
109}