001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapred;
019
020import java.io.IOException;
021import org.apache.hadoop.conf.Configured;
022import org.apache.hadoop.fs.Path;
023import org.apache.hadoop.hbase.HBaseConfiguration;
024import org.apache.hadoop.hbase.client.Result;
025import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
026import org.apache.hadoop.mapred.FileOutputFormat;
027import org.apache.hadoop.mapred.JobClient;
028import org.apache.hadoop.mapred.JobConf;
029import org.apache.hadoop.mapred.OutputCollector;
030import org.apache.hadoop.mapred.Reporter;
031import org.apache.hadoop.util.Tool;
032import org.apache.hadoop.util.ToolRunner;
033import org.apache.yetus.audience.InterfaceAudience;
034
035/**
036 * A job with a map to count rows. Map outputs table rows IF the input row has columns that have
037 * content. Uses a org.apache.hadoop.mapred.lib.IdentityReducer
038 */
039@InterfaceAudience.Public
040public class RowCounter extends Configured implements Tool {
041  // Name of this 'program'
042  static final String NAME = "rowcounter";
043
044  /**
045   * Mapper that runs the count.
046   */
047  static class RowCounterMapper implements TableMap<ImmutableBytesWritable, Result> {
048    private static enum Counters {
049      ROWS
050    }
051
052    public void map(ImmutableBytesWritable row, Result values,
053      OutputCollector<ImmutableBytesWritable, Result> output, Reporter reporter)
054      throws IOException {
055      // Count every row containing data, whether it's in qualifiers or values
056      reporter.incrCounter(Counters.ROWS, 1);
057    }
058
059    public void configure(JobConf jc) {
060      // Nothing to do.
061    }
062
063    public void close() throws IOException {
064      // Nothing to do.
065    }
066  }
067
068  /**
069   * n * @return the JobConf n
070   */
071  public JobConf createSubmittableJob(String[] args) throws IOException {
072    JobConf c = new JobConf(getConf(), getClass());
073    c.setJobName(NAME);
074    // Columns are space delimited
075    StringBuilder sb = new StringBuilder();
076    final int columnoffset = 2;
077    for (int i = columnoffset; i < args.length; i++) {
078      if (i > columnoffset) {
079        sb.append(" ");
080      }
081      sb.append(args[i]);
082    }
083    // Second argument is the table name.
084    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class,
085      ImmutableBytesWritable.class, Result.class, c);
086    c.setNumReduceTasks(0);
087    // First arg is the output directory.
088    FileOutputFormat.setOutputPath(c, new Path(args[0]));
089    return c;
090  }
091
092  static int printUsage() {
093    System.out.println(NAME + " <outputdir> <tablename> <column1> [<column2>...]");
094    return -1;
095  }
096
097  public int run(final String[] args) throws Exception {
098    // Make sure there are at least 3 parameters
099    if (args.length < 3) {
100      System.err.println("ERROR: Wrong number of parameters: " + args.length);
101      return printUsage();
102    }
103    JobClient.runJob(createSubmittableJob(args));
104    return 0;
105  }
106
107  /**
108   * nn
109   */
110  public static void main(String[] args) throws Exception {
111    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
112    System.exit(errCode);
113  }
114}