View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapred;
20  
21  import java.io.IOException;
22  
23  import org.apache.hadoop.hbase.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.classification.InterfaceStability;
25  import org.apache.hadoop.conf.Configured;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseConfiguration;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.mapred.FileOutputFormat;
31  import org.apache.hadoop.mapred.JobClient;
32  import org.apache.hadoop.mapred.JobConf;
33  import org.apache.hadoop.mapred.OutputCollector;
34  import org.apache.hadoop.mapred.Reporter;
35  import org.apache.hadoop.util.Tool;
36  import org.apache.hadoop.util.ToolRunner;
37  
38  /**
39   * A job with a map to count rows.
40   * Map outputs table rows IF the input row has columns that have content.
41   * Uses org.apache.hadoop.mapred.lib.IdentityReducer
42   */
43  @InterfaceAudience.Public
44  @InterfaceStability.Stable
45  public class RowCounter extends Configured implements Tool {
46    // Name of this 'program'
47    static final String NAME = "rowcounter";
48  
49    /**
50     * Mapper that runs the count.
51     */
52    static class RowCounterMapper
53    implements TableMap<ImmutableBytesWritable, Result> {
54      private static enum Counters {ROWS}
55  
56      public void map(ImmutableBytesWritable row, Result values,
57          OutputCollector<ImmutableBytesWritable, Result> output,
58          Reporter reporter)
59      throws IOException {
60          // Count every row containing data, whether it's in qualifiers or values
61          reporter.incrCounter(Counters.ROWS, 1);
62      }
63  
64      public void configure(JobConf jc) {
65        // Nothing to do.
66      }
67  
68      public void close() throws IOException {
69        // Nothing to do.
70      }
71    }
72  
73    /**
74     * @param args
75     * @return the JobConf
76     * @throws IOException
77     */
78    public JobConf createSubmittableJob(String[] args) throws IOException {
79      JobConf c = new JobConf(getConf(), getClass());
80      c.setJobName(NAME);
81      // Columns are space delimited
82      StringBuilder sb = new StringBuilder();
83      final int columnoffset = 2;
84      for (int i = columnoffset; i < args.length; i++) {
85        if (i > columnoffset) {
86          sb.append(" ");
87        }
88        sb.append(args[i]);
89      }
90      // Second argument is the table name.
91      TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
92        RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
93      c.setNumReduceTasks(0);
94      // First arg is the output directory.
95      FileOutputFormat.setOutputPath(c, new Path(args[0]));
96      return c;
97    }
98  
99    static int printUsage() {
100     System.out.println(NAME +
101       " <outputdir> <tablename> <column1> [<column2>...]");
102     return -1;
103   }
104 
105   public int run(final String[] args) throws Exception {
106     // Make sure there are at least 3 parameters
107     if (args.length < 3) {
108       System.err.println("ERROR: Wrong number of parameters: " + args.length);
109       return printUsage();
110     }
111     JobClient.runJob(createSubmittableJob(args));
112     return 0;
113   }
114 
115   /**
116    * @param args
117    * @throws Exception
118    */
119   public static void main(String[] args) throws Exception {
120     int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
121     System.exit(errCode);
122   }
123 }