001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapred; 019 020import java.io.IOException; 021import org.apache.hadoop.conf.Configured; 022import org.apache.hadoop.fs.Path; 023import org.apache.hadoop.hbase.HBaseConfiguration; 024import org.apache.hadoop.hbase.client.Result; 025import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 026import org.apache.hadoop.mapred.FileOutputFormat; 027import org.apache.hadoop.mapred.JobClient; 028import org.apache.hadoop.mapred.JobConf; 029import org.apache.hadoop.mapred.OutputCollector; 030import org.apache.hadoop.mapred.Reporter; 031import org.apache.hadoop.util.Tool; 032import org.apache.hadoop.util.ToolRunner; 033import org.apache.yetus.audience.InterfaceAudience; 034 035/** 036 * A job with a map to count rows. Map outputs table rows IF the input row has columns that have 037 * content. Uses a org.apache.hadoop.mapred.lib.IdentityReducer 038 */ 039@InterfaceAudience.Public 040public class RowCounter extends Configured implements Tool { 041 // Name of this 'program' 042 static final String NAME = "rowcounter"; 043 044 /** 045 * Mapper that runs the count. 046 */ 047 static class RowCounterMapper implements TableMap<ImmutableBytesWritable, Result> { 048 private static enum Counters { 049 ROWS 050 } 051 052 public void map(ImmutableBytesWritable row, Result values, 053 OutputCollector<ImmutableBytesWritable, Result> output, Reporter reporter) 054 throws IOException { 055 // Count every row containing data, whether it's in qualifiers or values 056 reporter.incrCounter(Counters.ROWS, 1); 057 } 058 059 public void configure(JobConf jc) { 060 // Nothing to do. 061 } 062 063 public void close() throws IOException { 064 // Nothing to do. 065 } 066 } 067 068 /** Returns the JobConf */ 069 public JobConf createSubmittableJob(String[] args) throws IOException { 070 JobConf c = new JobConf(getConf(), getClass()); 071 c.setJobName(NAME); 072 // Columns are space delimited 073 StringBuilder sb = new StringBuilder(); 074 final int columnoffset = 2; 075 for (int i = columnoffset; i < args.length; i++) { 076 if (i > columnoffset) { 077 sb.append(" "); 078 } 079 sb.append(args[i]); 080 } 081 // Second argument is the table name. 082 TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class, 083 ImmutableBytesWritable.class, Result.class, c); 084 c.setNumReduceTasks(0); 085 // First arg is the output directory. 086 FileOutputFormat.setOutputPath(c, new Path(args[0])); 087 return c; 088 } 089 090 static int printUsage() { 091 System.out.println(NAME + " <outputdir> <tablename> <column1> [<column2>...]"); 092 return -1; 093 } 094 095 public int run(final String[] args) throws Exception { 096 // Make sure there are at least 3 parameters 097 if (args.length < 3) { 098 System.err.println("ERROR: Wrong number of parameters: " + args.length); 099 return printUsage(); 100 } 101 JobClient.runJob(createSubmittableJob(args)); 102 return 0; 103 } 104 105 public static void main(String[] args) throws Exception { 106 int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args); 107 System.exit(errCode); 108 } 109}