@InterfaceAudience.Public @InterfaceStability.Evolving public class HFileOutputFormat2 extends org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>
HFile
s. Calling write(null,null) will forcibly roll
all HFiles being written.
Using this class as part of a MapReduce job is best done
using configureIncrementalLoad(Job, Table, RegionLocator)
.
Modifier and Type | Class and Description |
---|---|
(package private) static class |
HFileOutputFormat2.WriterLength |
Modifier and Type | Field and Description |
---|---|
private static String |
BLOCK_SIZE_FAMILIES_CONF_KEY |
private static String |
BLOOM_TYPE_FAMILIES_CONF_KEY |
private static String |
COMPRESSION_FAMILIES_CONF_KEY |
private static String |
DATABLOCK_ENCODING_FAMILIES_CONF_KEY |
static String |
DATABLOCK_ENCODING_OVERRIDE_CONF_KEY |
private static org.apache.commons.logging.Log |
LOG |
Constructor and Description |
---|
HFileOutputFormat2() |
Modifier and Type | Method and Description |
---|---|
(package private) static void |
configureBlockSize(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to block size map to configuration.
|
(package private) static void |
configureBloomType(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to bloom type map to configuration.
|
(package private) static void |
configureCompression(org.apache.hadoop.conf.Configuration conf,
HTableDescriptor tableDescriptor)
Serialize column family to compression algorithm map to configuration.
|
(package private) static void |
configureDataBlockEncoding(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to data block encoding map to configuration.
|
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTable table)
Deprecated.
Use
configureIncrementalLoad(Job, Table, RegionLocator) instead. |
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given
table.
|
(package private) static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls) |
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
Table table,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given
table.
|
static void |
configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job,
Table table) |
(package private) static void |
configurePartitioner(org.apache.hadoop.mapreduce.Job job,
List<ImmutableBytesWritable> splitPoints)
Configure
job with a TotalOrderPartitioner, partitioning against
splitPoints . |
(package private) static Map<byte[],Integer> |
createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to block size
map from the configuration.
|
(package private) static Map<byte[],BloomType> |
createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to bloom filter type
map from the configuration.
|
(package private) static Map<byte[],Compression.Algorithm> |
createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to compression algorithm
map from the configuration.
|
private static Map<byte[],String> |
createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf,
String confName)
Run inside the task to deserialize column family to given conf value map.
|
(package private) static Map<byte[],DataBlockEncoding> |
createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to data block encoding
type map from the configuration.
|
(package private) static <V extends Cell> |
createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context,
org.apache.hadoop.mapreduce.OutputCommitter committer) |
org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> |
getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) |
private static List<ImmutableBytesWritable> |
getRegionStartKeys(RegionLocator table)
Return the start keys of all of the regions in this table,
as a list of ImmutableBytesWritable.
|
private static void |
writePartitions(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path partitionsPath,
List<ImmutableBytesWritable> startKeys)
Write out a
SequenceFile that can be read by
TotalOrderPartitioner that contains the split points in startKeys. |
checkOutputSpecs, getCompressOutput, getDefaultWorkFile, getOutputCommitter, getOutputCompressorClass, getOutputName, getOutputPath, getPathForWorkFile, getUniqueFile, getWorkOutputPath, setCompressOutput, setOutputCompressorClass, setOutputName, setOutputPath
private static final org.apache.commons.logging.Log LOG
private static final String COMPRESSION_FAMILIES_CONF_KEY
private static final String BLOOM_TYPE_FAMILIES_CONF_KEY
private static final String BLOCK_SIZE_FAMILIES_CONF_KEY
private static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY
public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY
public org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
getRecordWriter
in class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>
IOException
InterruptedException
static <V extends Cell> org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,V> createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context, org.apache.hadoop.mapreduce.OutputCommitter committer) throws IOException
IOException
private static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table) throws IOException
IOException
private static void writePartitions(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException
SequenceFile
that can be read by
TotalOrderPartitioner
that contains the split points in startKeys.IOException
@Deprecated public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, HTable table) throws IOException
configureIncrementalLoad(Job, Table, RegionLocator)
instead.IOException
public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, Table table, RegionLocator regionLocator) throws IOException
IOException
public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator) throws IOException
IOException
static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls) throws IOException, UnsupportedEncodingException
public static void configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job, Table table) throws IOException
IOException
static Map<byte[],Compression.Algorithm> createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values fromstatic Map<byte[],BloomType> createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values fromstatic Map<byte[],Integer> createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values fromstatic Map<byte[],DataBlockEncoding> createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values fromprivate static Map<byte[],String> createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf, String confName)
conf
- to read the serialized values fromconfName
- conf key to read from the configurationstatic void configurePartitioner(org.apache.hadoop.mapreduce.Job job, List<ImmutableBytesWritable> splitPoints) throws IOException
job
with a TotalOrderPartitioner, partitioning against
splitPoints
. Cleans up the partitions file after job exists.IOException
static void configureCompression(org.apache.hadoop.conf.Configuration conf, HTableDescriptor tableDescriptor) throws UnsupportedEncodingException
table
- to read the properties fromconf
- to persist serialized values intoIOException
- on failure to read column family descriptorsUnsupportedEncodingException
static void configureBlockSize(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
tableDescriptor
- to read the properties fromconf
- to persist serialized values intoIOException
- on failure to read column family descriptorsUnsupportedEncodingException
static void configureBloomType(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
tableDescriptor
- to read the properties fromconf
- to persist serialized values intoIOException
- on failure to read column family descriptorsUnsupportedEncodingException
static void configureDataBlockEncoding(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
table
- to read the properties fromconf
- to persist serialized values intoIOException
- on failure to read column family descriptorsUnsupportedEncodingException
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.