@InterfaceAudience.Public @InterfaceStability.Evolving public class HFileOutputFormat2 extends org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>
HFiles. Calling write(null,null) will forcibly roll
all HFiles being written.
Using this class as part of a MapReduce job is best done
using configureIncrementalLoad(Job, Table, RegionLocator).
| Modifier and Type | Class and Description |
|---|---|
(package private) static class |
HFileOutputFormat2.WriterLength |
| Modifier and Type | Field and Description |
|---|---|
private static String |
BLOCK_SIZE_FAMILIES_CONF_KEY |
private static String |
BLOOM_TYPE_FAMILIES_CONF_KEY |
private static String |
COMPRESSION_FAMILIES_CONF_KEY |
private static String |
DATABLOCK_ENCODING_FAMILIES_CONF_KEY |
static String |
DATABLOCK_ENCODING_OVERRIDE_CONF_KEY |
private static org.apache.commons.logging.Log |
LOG |
| Constructor and Description |
|---|
HFileOutputFormat2() |
| Modifier and Type | Method and Description |
|---|---|
(package private) static void |
configureBlockSize(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to block size map to configuration.
|
(package private) static void |
configureBloomType(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to bloom type map to configuration.
|
(package private) static void |
configureCompression(org.apache.hadoop.conf.Configuration conf,
HTableDescriptor tableDescriptor)
Serialize column family to compression algorithm map to configuration.
|
(package private) static void |
configureDataBlockEncoding(HTableDescriptor tableDescriptor,
org.apache.hadoop.conf.Configuration conf)
Serialize column family to data block encoding map to configuration.
|
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTable table)
Deprecated.
Use
configureIncrementalLoad(Job, Table, RegionLocator) instead. |
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given
table.
|
(package private) static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls) |
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
Table table,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given
table.
|
static void |
configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job,
Table table) |
(package private) static void |
configurePartitioner(org.apache.hadoop.mapreduce.Job job,
List<ImmutableBytesWritable> splitPoints)
Configure
job with a TotalOrderPartitioner, partitioning against
splitPoints. |
(package private) static Map<byte[],Integer> |
createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to block size
map from the configuration.
|
(package private) static Map<byte[],BloomType> |
createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to bloom filter type
map from the configuration.
|
(package private) static Map<byte[],Compression.Algorithm> |
createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to compression algorithm
map from the configuration.
|
private static Map<byte[],String> |
createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf,
String confName)
Run inside the task to deserialize column family to given conf value map.
|
(package private) static Map<byte[],DataBlockEncoding> |
createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to data block encoding
type map from the configuration.
|
(package private) static <V extends Cell> |
createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context,
org.apache.hadoop.mapreduce.OutputCommitter committer) |
org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> |
getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) |
private static List<ImmutableBytesWritable> |
getRegionStartKeys(RegionLocator table)
Return the start keys of all of the regions in this table,
as a list of ImmutableBytesWritable.
|
private static void |
writePartitions(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path partitionsPath,
List<ImmutableBytesWritable> startKeys)
Write out a
SequenceFile that can be read by
TotalOrderPartitioner that contains the split points in startKeys. |
checkOutputSpecs, getCompressOutput, getDefaultWorkFile, getOutputCommitter, getOutputCompressorClass, getOutputName, getOutputPath, getPathForWorkFile, getUniqueFile, getWorkOutputPath, setCompressOutput, setOutputCompressorClass, setOutputName, setOutputPathprivate static final org.apache.commons.logging.Log LOG
private static final String COMPRESSION_FAMILIES_CONF_KEY
private static final String BLOOM_TYPE_FAMILIES_CONF_KEY
private static final String BLOCK_SIZE_FAMILIES_CONF_KEY
private static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY
public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY
public org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
getRecordWriter in class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>IOExceptionInterruptedExceptionstatic <V extends Cell> org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,V> createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context, org.apache.hadoop.mapreduce.OutputCommitter committer) throws IOException
IOExceptionprivate static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table) throws IOException
IOExceptionprivate static void writePartitions(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path partitionsPath,
List<ImmutableBytesWritable> startKeys)
throws IOException
SequenceFile that can be read by
TotalOrderPartitioner that contains the split points in startKeys.IOException@Deprecated public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, HTable table) throws IOException
configureIncrementalLoad(Job, Table, RegionLocator) instead.IOExceptionpublic static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
Table table,
RegionLocator regionLocator)
throws IOException
IOExceptionpublic static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator)
throws IOException
IOExceptionstatic void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
HTableDescriptor tableDescriptor,
RegionLocator regionLocator,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls)
throws IOException,
UnsupportedEncodingException
public static void configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job,
Table table)
throws IOException
IOExceptionstatic Map<byte[],Compression.Algorithm> createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
conf - to read the serialized values fromstatic Map<byte[],BloomType> createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
conf - to read the serialized values fromstatic Map<byte[],Integer> createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
conf - to read the serialized values fromstatic Map<byte[],DataBlockEncoding> createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
conf - to read the serialized values fromprivate static Map<byte[],String> createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf, String confName)
conf - to read the serialized values fromconfName - conf key to read from the configurationstatic void configurePartitioner(org.apache.hadoop.mapreduce.Job job,
List<ImmutableBytesWritable> splitPoints)
throws IOException
job with a TotalOrderPartitioner, partitioning against
splitPoints. Cleans up the partitions file after job exists.IOExceptionstatic void configureCompression(org.apache.hadoop.conf.Configuration conf,
HTableDescriptor tableDescriptor)
throws UnsupportedEncodingException
table - to read the properties fromconf - to persist serialized values intoIOException - on failure to read column family descriptorsUnsupportedEncodingExceptionstatic void configureBlockSize(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
tableDescriptor - to read the properties fromconf - to persist serialized values intoIOException - on failure to read column family descriptorsUnsupportedEncodingExceptionstatic void configureBloomType(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
tableDescriptor - to read the properties fromconf - to persist serialized values intoIOException - on failure to read column family descriptorsUnsupportedEncodingExceptionstatic void configureDataBlockEncoding(HTableDescriptor tableDescriptor, org.apache.hadoop.conf.Configuration conf) throws UnsupportedEncodingException
table - to read the properties fromconf - to persist serialized values intoIOException - on failure to read column family descriptorsUnsupportedEncodingExceptionCopyright © 2007–2019 The Apache Software Foundation. All rights reserved.