@InterfaceAudience.Public public class TableMapReduceUtil extends Object
TableMap and TableReduce| Constructor and Description | 
|---|
| TableMapReduceUtil() | 
| Modifier and Type | Method and Description | 
|---|---|
| static void | addDependencyJars(org.apache.hadoop.mapred.JobConf job) | 
| private static int | getRegionCount(org.apache.hadoop.conf.Configuration conf,
              TableName tableName) | 
| static void | initCredentials(org.apache.hadoop.mapred.JobConf job) | 
| static void | initMultiTableSnapshotMapperJob(Map<String,Collection<Scan>> snapshotScans,
                               Class<? extends TableMap> mapper,
                               Class<?> outputKeyClass,
                               Class<?> outputValueClass,
                               org.apache.hadoop.mapred.JobConf job,
                               boolean addDependencyJars,
                               org.apache.hadoop.fs.Path tmpRestoreDir)Sets up the job for reading from one or more multiple table snapshots, with one or more scans
 per snapshot. | 
| static void | initTableMapJob(String table,
               String columns,
               Class<? extends TableMap> mapper,
               Class<?> outputKeyClass,
               Class<?> outputValueClass,
               org.apache.hadoop.mapred.JobConf job)Use this before submitting a TableMap job. | 
| static void | initTableMapJob(String table,
               String columns,
               Class<? extends TableMap> mapper,
               Class<?> outputKeyClass,
               Class<?> outputValueClass,
               org.apache.hadoop.mapred.JobConf job,
               boolean addDependencyJars) | 
| static void | initTableMapJob(String table,
               String columns,
               Class<? extends TableMap> mapper,
               Class<?> outputKeyClass,
               Class<?> outputValueClass,
               org.apache.hadoop.mapred.JobConf job,
               boolean addDependencyJars,
               Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormat)Use this before submitting a TableMap job. | 
| static void | initTableReduceJob(String table,
                  Class<? extends TableReduce> reducer,
                  org.apache.hadoop.mapred.JobConf job)Use this before submitting a TableReduce job. | 
| static void | initTableReduceJob(String table,
                  Class<? extends TableReduce> reducer,
                  org.apache.hadoop.mapred.JobConf job,
                  Class partitioner)Use this before submitting a TableReduce job. | 
| static void | initTableReduceJob(String table,
                  Class<? extends TableReduce> reducer,
                  org.apache.hadoop.mapred.JobConf job,
                  Class partitioner,
                  boolean addDependencyJars)Use this before submitting a TableReduce job. | 
| static void | initTableSnapshotMapJob(String snapshotName,
                       String columns,
                       Class<? extends TableMap> mapper,
                       Class<?> outputKeyClass,
                       Class<?> outputValueClass,
                       org.apache.hadoop.mapred.JobConf job,
                       boolean addDependencyJars,
                       org.apache.hadoop.fs.Path tmpRestoreDir)Sets up the job for reading from a table snapshot. | 
| static void | initTableSnapshotMapJob(String snapshotName,
                       String columns,
                       Class<? extends TableMap> mapper,
                       Class<?> outputKeyClass,
                       Class<?> outputValueClass,
                       org.apache.hadoop.mapred.JobConf jobConf,
                       boolean addDependencyJars,
                       org.apache.hadoop.fs.Path tmpRestoreDir,
                       RegionSplitter.SplitAlgorithm splitAlgo,
                       int numSplitsPerRegion)Sets up the job for reading from a table snapshot. | 
| static void | limitNumMapTasks(String table,
                org.apache.hadoop.mapred.JobConf job)Ensures that the given number of map tasks for the given job
 configuration does not exceed the number of regions for the given table. | 
| static void | limitNumReduceTasks(String table,
                   org.apache.hadoop.mapred.JobConf job)Ensures that the given number of reduce tasks for the given job
 configuration does not exceed the number of regions for the given table. | 
| static void | setNumMapTasks(String table,
              org.apache.hadoop.mapred.JobConf job)Sets the number of map tasks for the given job configuration to the
 number of regions the given table has. | 
| static void | setNumReduceTasks(String table,
                 org.apache.hadoop.mapred.JobConf job)Sets the number of reduce tasks for the given job configuration to the
 number of regions the given table has. | 
| static void | setScannerCaching(org.apache.hadoop.mapred.JobConf job,
                 int batchSize)Sets the number of rows to return and cache with each scanner iteration. | 
public TableMapReduceUtil()
public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf job)
table - The table name to read from.columns - The columns to scan.mapper - The mapper class to use.outputKeyClass - The class of the output key.outputValueClass - The class of the output value.job - The current job configuration to adjust.public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf job, boolean addDependencyJars)
public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf job, boolean addDependencyJars, Class<? extends org.apache.hadoop.mapred.InputFormat> inputFormat)
table - The table name to read from.columns - The columns to scan.mapper - The mapper class to use.outputKeyClass - The class of the output key.outputValueClass - The class of the output value.job - The current job configuration to adjust.addDependencyJars - upload HBase jars and jars for any of the configured
           job classes via the distributed cache (tmpjars).public static void initMultiTableSnapshotMapperJob(Map<String,Collection<Scan>> snapshotScans, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf job, boolean addDependencyJars, org.apache.hadoop.fs.Path tmpRestoreDir) throws IOException
snapshotScans - map of snapshot name to scans on that snapshot.mapper - The mapper class to use.outputKeyClass - The class of the output key.outputValueClass - The class of the output value.job - The current job to adjust.  Make sure the passed job is
                          carrying all necessary HBase configuration.addDependencyJars - upload HBase jars and jars for any of the configured
                          job classes via the distributed cache (tmpjars).IOExceptionpublic static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf job, boolean addDependencyJars, org.apache.hadoop.fs.Path tmpRestoreDir) throws IOException
snapshotName - The name of the snapshot (of a table) to read from.columns - The columns to scan.mapper - The mapper class to use.outputKeyClass - The class of the output key.outputValueClass - The class of the output value.job - The current job to adjust.  Make sure the passed job is
 carrying all necessary HBase configuration.addDependencyJars - upload HBase jars and jars for any of the configured
           job classes via the distributed cache (tmpjars).tmpRestoreDir - a temporary directory to copy the snapshot files into. Current user should
 have write permissions to this directory, and this should not be a subdirectory of rootdir.
 After the job is finished, restore directory can be deleted.IOException - When setting up the details fails.TableSnapshotInputFormatpublic static void initTableSnapshotMapJob(String snapshotName, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapred.JobConf jobConf, boolean addDependencyJars, org.apache.hadoop.fs.Path tmpRestoreDir, RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException
snapshotName - The name of the snapshot (of a table) to read from.columns - The columns to scan.mapper - The mapper class to use.outputKeyClass - The class of the output key.outputValueClass - The class of the output value.jobConf - The current job to adjust.  Make sure the passed job is
 carrying all necessary HBase configuration.addDependencyJars - upload HBase jars and jars for any of the configured
           job classes via the distributed cache (tmpjars).tmpRestoreDir - a temporary directory to copy the snapshot files into. Current user should
 have write permissions to this directory, and this should not be a subdirectory of rootdir.
 After the job is finished, restore directory can be deleted.splitAlgo - algorithm to splitnumSplitsPerRegion - how many input splits to generate per one regionIOException - When setting up the details fails.TableSnapshotInputFormatpublic static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, org.apache.hadoop.mapred.JobConf job) throws IOException
table - The output table.reducer - The reducer class to use.job - The current job configuration to adjust.IOException - When determining the region count fails.public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, org.apache.hadoop.mapred.JobConf job, Class partitioner) throws IOException
table - The output table.reducer - The reducer class to use.job - The current job configuration to adjust.partitioner - Partitioner to use. Pass null to use
 default partitioner.IOException - When determining the region count fails.public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, org.apache.hadoop.mapred.JobConf job, Class partitioner, boolean addDependencyJars) throws IOException
table - The output table.reducer - The reducer class to use.job - The current job configuration to adjust.partitioner - Partitioner to use. Pass null to use
 default partitioner.addDependencyJars - upload HBase jars and jars for any of the configured
           job classes via the distributed cache (tmpjars).IOException - When determining the region count fails.public static void initCredentials(org.apache.hadoop.mapred.JobConf job) throws IOException
IOExceptionpublic static void limitNumReduceTasks(String table, org.apache.hadoop.mapred.JobConf job) throws IOException
table - The table to get the region count for.job - The current job configuration to adjust.IOException - When retrieving the table details fails.public static void limitNumMapTasks(String table, org.apache.hadoop.mapred.JobConf job) throws IOException
table - The table to get the region count for.job - The current job configuration to adjust.IOException - When retrieving the table details fails.public static void setNumReduceTasks(String table, org.apache.hadoop.mapred.JobConf job) throws IOException
table - The table to get the region count for.job - The current job configuration to adjust.IOException - When retrieving the table details fails.public static void setNumMapTasks(String table, org.apache.hadoop.mapred.JobConf job) throws IOException
table - The table to get the region count for.job - The current job configuration to adjust.IOException - When retrieving the table details fails.public static void setScannerCaching(org.apache.hadoop.mapred.JobConf job, int batchSize)
job - The current job configuration to adjust.batchSize - The number of rows to return in batch with each scanner
 iteration.public static void addDependencyJars(org.apache.hadoop.mapred.JobConf job) throws IOException
private static int getRegionCount(org.apache.hadoop.conf.Configuration conf, TableName tableName) throws IOException
IOExceptionCopyright © 2007–2021 The Apache Software Foundation. All rights reserved.