001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.mapred;
020
021import java.io.IOException;
022
023import org.apache.yetus.audience.InterfaceAudience;
024import org.apache.hadoop.conf.Configured;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.HBaseConfiguration;
027import org.apache.hadoop.hbase.client.Result;
028import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
029import org.apache.hadoop.mapred.FileOutputFormat;
030import org.apache.hadoop.mapred.JobClient;
031import org.apache.hadoop.mapred.JobConf;
032import org.apache.hadoop.mapred.OutputCollector;
033import org.apache.hadoop.mapred.Reporter;
034import org.apache.hadoop.util.Tool;
035import org.apache.hadoop.util.ToolRunner;
036
037/**
038 * A job with a map to count rows.
039 * Map outputs table rows IF the input row has columns that have content.
040 * Uses a org.apache.hadoop.mapred.lib.IdentityReducer
041 */
042@InterfaceAudience.Public
043public class RowCounter extends Configured implements Tool {
044  // Name of this 'program'
045  static final String NAME = "rowcounter";
046
047  /**
048   * Mapper that runs the count.
049   */
050  static class RowCounterMapper
051  implements TableMap<ImmutableBytesWritable, Result> {
052    private static enum Counters {ROWS}
053
054    public void map(ImmutableBytesWritable row, Result values,
055        OutputCollector<ImmutableBytesWritable, Result> output,
056        Reporter reporter)
057    throws IOException {
058        // Count every row containing data, whether it's in qualifiers or values
059        reporter.incrCounter(Counters.ROWS, 1);
060    }
061
062    public void configure(JobConf jc) {
063      // Nothing to do.
064    }
065
066    public void close() throws IOException {
067      // Nothing to do.
068    }
069  }
070
071  /**
072   * @param args
073   * @return the JobConf
074   * @throws IOException
075   */
076  public JobConf createSubmittableJob(String[] args) throws IOException {
077    JobConf c = new JobConf(getConf(), getClass());
078    c.setJobName(NAME);
079    // Columns are space delimited
080    StringBuilder sb = new StringBuilder();
081    final int columnoffset = 2;
082    for (int i = columnoffset; i < args.length; i++) {
083      if (i > columnoffset) {
084        sb.append(" ");
085      }
086      sb.append(args[i]);
087    }
088    // Second argument is the table name.
089    TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
090      RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
091    c.setNumReduceTasks(0);
092    // First arg is the output directory.
093    FileOutputFormat.setOutputPath(c, new Path(args[0]));
094    return c;
095  }
096
097  static int printUsage() {
098    System.out.println(NAME +
099      " <outputdir> <tablename> <column1> [<column2>...]");
100    return -1;
101  }
102
103  public int run(final String[] args) throws Exception {
104    // Make sure there are at least 3 parameters
105    if (args.length < 3) {
106      System.err.println("ERROR: Wrong number of parameters: " + args.length);
107      return printUsage();
108    }
109    JobClient.runJob(createSubmittableJob(args));
110    return 0;
111  }
112
113  /**
114   * @param args
115   * @throws Exception
116   */
117  public static void main(String[] args) throws Exception {
118    int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
119    System.exit(errCode);
120  }
121}