001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.mapred; 020 021import java.io.IOException; 022 023import org.apache.yetus.audience.InterfaceAudience; 024import org.apache.hadoop.conf.Configured; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.HBaseConfiguration; 027import org.apache.hadoop.hbase.client.Result; 028import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 029import org.apache.hadoop.mapred.FileOutputFormat; 030import org.apache.hadoop.mapred.JobClient; 031import org.apache.hadoop.mapred.JobConf; 032import org.apache.hadoop.mapred.OutputCollector; 033import org.apache.hadoop.mapred.Reporter; 034import org.apache.hadoop.util.Tool; 035import org.apache.hadoop.util.ToolRunner; 036 037/** 038 * A job with a map to count rows. 039 * Map outputs table rows IF the input row has columns that have content. 040 * Uses a org.apache.hadoop.mapred.lib.IdentityReducer 041 */ 042@InterfaceAudience.Public 043public class RowCounter extends Configured implements Tool { 044 // Name of this 'program' 045 static final String NAME = "rowcounter"; 046 047 /** 048 * Mapper that runs the count. 049 */ 050 static class RowCounterMapper 051 implements TableMap<ImmutableBytesWritable, Result> { 052 private static enum Counters {ROWS} 053 054 public void map(ImmutableBytesWritable row, Result values, 055 OutputCollector<ImmutableBytesWritable, Result> output, 056 Reporter reporter) 057 throws IOException { 058 // Count every row containing data, whether it's in qualifiers or values 059 reporter.incrCounter(Counters.ROWS, 1); 060 } 061 062 public void configure(JobConf jc) { 063 // Nothing to do. 064 } 065 066 public void close() throws IOException { 067 // Nothing to do. 068 } 069 } 070 071 /** 072 * @param args 073 * @return the JobConf 074 * @throws IOException 075 */ 076 public JobConf createSubmittableJob(String[] args) throws IOException { 077 JobConf c = new JobConf(getConf(), getClass()); 078 c.setJobName(NAME); 079 // Columns are space delimited 080 StringBuilder sb = new StringBuilder(); 081 final int columnoffset = 2; 082 for (int i = columnoffset; i < args.length; i++) { 083 if (i > columnoffset) { 084 sb.append(" "); 085 } 086 sb.append(args[i]); 087 } 088 // Second argument is the table name. 089 TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), 090 RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, c); 091 c.setNumReduceTasks(0); 092 // First arg is the output directory. 093 FileOutputFormat.setOutputPath(c, new Path(args[0])); 094 return c; 095 } 096 097 static int printUsage() { 098 System.out.println(NAME + 099 " <outputdir> <tablename> <column1> [<column2>...]"); 100 return -1; 101 } 102 103 public int run(final String[] args) throws Exception { 104 // Make sure there are at least 3 parameters 105 if (args.length < 3) { 106 System.err.println("ERROR: Wrong number of parameters: " + args.length); 107 return printUsage(); 108 } 109 JobClient.runJob(createSubmittableJob(args)); 110 return 0; 111 } 112 113 /** 114 * @param args 115 * @throws Exception 116 */ 117 public static void main(String[] args) throws Exception { 118 int errCode = ToolRunner.run(HBaseConfiguration.create(), new RowCounter(), args); 119 System.exit(errCode); 120 } 121}