@InterfaceAudience.Public public class HFileOutputFormat2 extends org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>
HFile
s. Calling write(null,null) will
forcibly roll all HFiles being written.
Using this class as part of a MapReduce job is best done using
configureIncrementalLoad(Job, TableDescriptor, RegionLocator)
.
Modifier and Type | Class and Description |
---|---|
(package private) static class |
HFileOutputFormat2.TableInfo |
(package private) static class |
HFileOutputFormat2.WriterLength |
Constructor and Description |
---|
HFileOutputFormat2() |
Modifier and Type | Method and Description |
---|---|
protected static byte[] |
combineTableNameSuffix(byte[] tableName,
byte[] suffix) |
(package private) static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
List<HFileOutputFormat2.TableInfo> multiTableInfo,
Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls) |
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
TableDescriptor tableDescriptor,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given table.
|
static void |
configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job,
Table table,
RegionLocator regionLocator)
Configure a MapReduce Job to perform an incremental load into the given table.
|
static void |
configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job,
TableDescriptor tableDescriptor) |
(package private) static void |
configurePartitioner(org.apache.hadoop.mapreduce.Job job,
List<ImmutableBytesWritable> splitPoints,
boolean writeMultipleTables)
Configure
job with a TotalOrderPartitioner, partitioning against
splitPoints . |
static void |
configureRemoteCluster(org.apache.hadoop.mapreduce.Job job,
org.apache.hadoop.conf.Configuration clusterConf)
Configure HBase cluster key for remote cluster to load region location for locality-sensitive
if it's enabled.
|
(package private) static void |
configureStoragePolicy(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.FileSystem fs,
byte[] tableAndFamily,
org.apache.hadoop.fs.Path cfPath)
Configure block storage policy for CF after the directory is created.
|
(package private) static Map<byte[],Integer> |
createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to block size map from the configuration.
|
(package private) static Map<byte[],String> |
createFamilyBloomParamMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to bloom filter param map from the
configuration.
|
(package private) static Map<byte[],BloomType> |
createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to bloom filter type map from the
configuration.
|
(package private) static Map<byte[],Compression.Algorithm> |
createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to compression algorithm map from the
configuration.
|
private static Map<byte[],String> |
createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf,
String confName)
Run inside the task to deserialize column family to given conf value map.
|
(package private) static Map<byte[],DataBlockEncoding> |
createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
Runs inside the task to deserialize column family to data block encoding type map from the
configuration.
|
(package private) static <V extends Cell> |
createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context,
org.apache.hadoop.mapreduce.OutputCommitter committer) |
org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> |
getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) |
private static List<ImmutableBytesWritable> |
getRegionStartKeys(List<RegionLocator> regionLocators,
boolean writeMultipleTables)
Return the start keys of all of the regions in this table, as a list of ImmutableBytesWritable.
|
protected static byte[] |
getTableNameSuffixedWithFamily(byte[] tableName,
byte[] family) |
(package private) static String |
serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor,String> fn,
List<TableDescriptor> allTables) |
private static void |
writePartitions(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path partitionsPath,
List<ImmutableBytesWritable> startKeys,
boolean writeMultipleTables)
Write out a
SequenceFile that can be read by TotalOrderPartitioner that
contains the split points in startKeys. |
checkOutputSpecs, getCompressOutput, getDefaultWorkFile, getOutputCommitter, getOutputCompressorClass, getOutputName, getOutputPath, getPathForWorkFile, getUniqueFile, getWorkOutputPath, setCompressOutput, setOutputCompressorClass, setOutputName, setOutputPath
private static final org.slf4j.Logger LOG
protected static final byte[] tableSeparator
static final String COMPRESSION_FAMILIES_CONF_KEY
static final String BLOOM_TYPE_FAMILIES_CONF_KEY
static final String BLOOM_PARAM_FAMILIES_CONF_KEY
static final String BLOCK_SIZE_FAMILIES_CONF_KEY
static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY
public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY
public static final String COMPRESSION_OVERRIDE_CONF_KEY
public static final String LOCALITY_SENSITIVE_CONF_KEY
private static final boolean DEFAULT_LOCALITY_SENSITIVE
static final String OUTPUT_TABLE_NAME_CONF_KEY
static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY
public static final String REMOTE_CLUSTER_CONF_PREFIX
public static final String REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY
public static final String REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY
public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY
public static final String STORAGE_POLICY_PROPERTY
public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX
@InterfaceAudience.Private static Function<ColumnFamilyDescriptor,String> compressionDetails
@InterfaceAudience.Private static Function<ColumnFamilyDescriptor,String> blockSizeDetails
@InterfaceAudience.Private static Function<ColumnFamilyDescriptor,String> bloomTypeDetails
@InterfaceAudience.Private static Function<ColumnFamilyDescriptor,String> bloomParamDetails
@InterfaceAudience.Private static Function<ColumnFamilyDescriptor,String> dataBlockEncodingDetails
public HFileOutputFormat2()
protected static byte[] combineTableNameSuffix(byte[] tableName, byte[] suffix)
public org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,Cell> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException, InterruptedException
getRecordWriter
in class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<ImmutableBytesWritable,Cell>
IOException
InterruptedException
protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family)
static <V extends Cell> org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable,V> createRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext context, org.apache.hadoop.mapreduce.OutputCommitter committer) throws IOException
IOException
static void configureStoragePolicy(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.FileSystem fs, byte[] tableAndFamily, org.apache.hadoop.fs.Path cfPath)
private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators, boolean writeMultipleTables) throws IOException
IOException
private static void writePartitions(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path partitionsPath, List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException
SequenceFile
that can be read by TotalOrderPartitioner
that
contains the split points in startKeys.IOException
public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, Table table, RegionLocator regionLocator) throws IOException
IOException
public static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, TableDescriptor tableDescriptor, RegionLocator regionLocator) throws IOException
IOException
static void configureIncrementalLoad(org.apache.hadoop.mapreduce.Job job, List<HFileOutputFormat2.TableInfo> multiTableInfo, Class<? extends org.apache.hadoop.mapreduce.OutputFormat<?,?>> cls) throws IOException
IOException
public static void configureIncrementalLoadMap(org.apache.hadoop.mapreduce.Job job, TableDescriptor tableDescriptor) throws IOException
IOException
public static void configureRemoteCluster(org.apache.hadoop.mapreduce.Job job, org.apache.hadoop.conf.Configuration clusterConf)
TableInputFormat
and
generate hfiles for HBase cluster B. Otherwise, HFileOutputFormat2 fetch location from cluster
A and locality-sensitive won't working correctly.
configureIncrementalLoad(Job, Table, RegionLocator)
calls this method using
Table.getConfiguration()
as clusterConf. See HBASE-25608.job
- which has configuration to be updatedclusterConf
- which contains cluster key of the HBase cluster to be locality-sensitiveconfigureIncrementalLoad(Job, Table, RegionLocator)
,
LOCALITY_SENSITIVE_CONF_KEY
,
REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY
,
REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY
,
REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY
@InterfaceAudience.Private static Map<byte[],Compression.Algorithm> createFamilyCompressionMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values from@InterfaceAudience.Private static Map<byte[],BloomType> createFamilyBloomTypeMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values from@InterfaceAudience.Private static Map<byte[],String> createFamilyBloomParamMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values from@InterfaceAudience.Private static Map<byte[],Integer> createFamilyBlockSizeMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values from@InterfaceAudience.Private static Map<byte[],DataBlockEncoding> createFamilyDataBlockEncodingMap(org.apache.hadoop.conf.Configuration conf)
conf
- to read the serialized values fromprivate static Map<byte[],String> createFamilyConfValueMap(org.apache.hadoop.conf.Configuration conf, String confName)
conf
- to read the serialized values fromconfName
- conf key to read from the configurationstatic void configurePartitioner(org.apache.hadoop.mapreduce.Job job, List<ImmutableBytesWritable> splitPoints, boolean writeMultipleTables) throws IOException
job
with a TotalOrderPartitioner, partitioning against
splitPoints
. Cleans up the partitions file after job exists.IOException
@InterfaceAudience.Private static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor,String> fn, List<TableDescriptor> allTables) throws UnsupportedEncodingException
UnsupportedEncodingException
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.