View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapred;
20  
21  import java.io.IOException;
22  
23  import org.apache.hadoop.hbase.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.classification.InterfaceStability;
25  import org.apache.hadoop.conf.Configured;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseConfiguration;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.mapred.FileOutputFormat;
31  import org.apache.hadoop.mapred.JobClient;
32  import org.apache.hadoop.mapred.JobConf;
33  import org.apache.hadoop.mapred.OutputCollector;
34  import org.apache.hadoop.mapred.Reporter;
35  import org.apache.hadoop.mapred.lib.IdentityReducer;
36  import org.apache.hadoop.util.Tool;
37  import org.apache.hadoop.util.ToolRunner;
38  
39  /**
40   * A job with a map to count rows.
41   * Map outputs table rows IF the input row has columns that have content.
42   * Uses an {@link IdentityReducer}
43   */
44  @InterfaceAudience.Public
45  @InterfaceStability.Stable
46  public class RowCounter extends Configured implements Tool {
47    // Name of this 'program'
48    static final String NAME = "rowcounter";
49  
50    /**
51     * Mapper that runs the count.
52     */
53    static class RowCounterMapper
54    implements TableMap<ImmutableBytesWritable, Result> {
55      private static enum Counters {ROWS}
56  
57      public void map(ImmutableBytesWritable row, Result values,
58          OutputCollector<ImmutableBytesWritable, Result> output,
59          Reporter reporter)
60      throws IOException {
61          // Count every row containing data, whether it's in qualifiers or values
62          reporter.incrCounter(Counters.ROWS, 1);
63      }
64  
65      public void configure(JobConf jc) {
66        // Nothing to do.
67      }
68  
69      public void close() throws IOException {
70        // Nothing to do.
71      }
72    }
73  
74    /**
75     * @param args
76     * @return the JobConf
77     * @throws IOException
78     */
79    public JobConf createSubmittableJob(String[] args) throws IOException {
80      JobConf c = new JobConf(getConf(), getClass());
81      c.setJobName(NAME);
82      // Columns are space delimited
83      StringBuilder sb = new StringBuilder();
84      final int columnoffset = 2;
85      for (int i = columnoffset; i < args.length; i++) {
86        if (i > columnoffset) {
87          sb.append(" ");
88        }
89        sb.append(args[i]);
90      }
91      // Second argument is the table name.
92      TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
93        RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c);
94      c.setNumReduceTasks(0);
95      // First arg is the output directory.
96      FileOutputFormat.setOutputPath(c, new Path(args[0]));
97      return c;
98    }
99  
100   static int printUsage() {
101     System.out.println(NAME +
102       " <outputdir> <tablename> <column1> [<column2>...]");
103     return -1;
104   }
105 
106   public int run(final String[] args) throws Exception {
107     // Make sure there are at least 3 parameters
108     if (args.length < 3) {
109       System.err.println("ERROR: Wrong number of parameters: " + args.length);
110       return printUsage();
111     }
112     JobClient.runJob(createSubmittableJob(args));
113     return 0;
114   }
115 
116   /**
117    * @param args
118    * @throws Exception
119    */
120   public static void main(String[] args) throws Exception {
121     int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args);
122     System.exit(errCode);
123   }
124 }