001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapred; 019 020import java.io.IOException; 021import org.apache.hadoop.conf.Configured; 022import org.apache.hadoop.fs.Path; 023import org.apache.hadoop.hbase.HBaseConfiguration; 024import org.apache.hadoop.hbase.client.Result; 025import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 026import org.apache.hadoop.mapred.FileOutputFormat; 027import org.apache.hadoop.mapred.JobClient; 028import org.apache.hadoop.mapred.JobConf; 029import org.apache.hadoop.mapred.OutputCollector; 030import org.apache.hadoop.mapred.Reporter; 031import org.apache.hadoop.util.Tool; 032import org.apache.hadoop.util.ToolRunner; 033import org.apache.yetus.audience.InterfaceAudience; 034 035/** 036 * A job with a map to count rows. Map outputs table rows IF the input row has columns that have 037 * content. Uses a org.apache.hadoop.mapred.lib.IdentityReducer 038 */ 039@InterfaceAudience.Public 040public class RowCounter extends Configured implements Tool { 041 // Name of this 'program' 042 static final String NAME = "rowcounter"; 043 044 /** 045 * Mapper that runs the count. 046 */ 047 static class RowCounterMapper implements TableMap<ImmutableBytesWritable, Result> { 048 private static enum Counters { 049 ROWS 050 } 051 052 public void map(ImmutableBytesWritable row, Result values, 053 OutputCollector<ImmutableBytesWritable, Result> output, Reporter reporter) 054 throws IOException { 055 // Count every row containing data, whether it's in qualifiers or values 056 reporter.incrCounter(Counters.ROWS, 1); 057 } 058 059 public void configure(JobConf jc) { 060 // Nothing to do. 061 } 062 063 public void close() throws IOException { 064 // Nothing to do. 065 } 066 } 067 068 /** 069 * n * @return the JobConf n 070 */ 071 public JobConf createSubmittableJob(String[] args) throws IOException { 072 JobConf c = new JobConf(getConf(), getClass()); 073 c.setJobName(NAME); 074 // Columns are space delimited 075 StringBuilder sb = new StringBuilder(); 076 final int columnoffset = 2; 077 for (int i = columnoffset; i < args.length; i++) { 078 if (i > columnoffset) { 079 sb.append(" "); 080 } 081 sb.append(args[i]); 082 } 083 // Second argument is the table name. 084 TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), RowCounterMapper.class, 085 ImmutableBytesWritable.class, Result.class, c); 086 c.setNumReduceTasks(0); 087 // First arg is the output directory. 088 FileOutputFormat.setOutputPath(c, new Path(args[0])); 089 return c; 090 } 091 092 static int printUsage() { 093 System.out.println(NAME + " <outputdir> <tablename> <column1> [<column2>...]"); 094 return -1; 095 } 096 097 public int run(final String[] args) throws Exception { 098 // Make sure there are at least 3 parameters 099 if (args.length < 3) { 100 System.err.println("ERROR: Wrong number of parameters: " + args.length); 101 return printUsage(); 102 } 103 JobClient.runJob(createSubmittableJob(args)); 104 return 0; 105 } 106 107 /** 108 * nn 109 */ 110 public static void main(String[] args) throws Exception { 111 int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args); 112 System.exit(errCode); 113 } 114}