@InterfaceAudience.Public public class LoadIncrementalHFiles extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.util.Tool
Modifier and Type | Class and Description |
---|---|
private static interface |
LoadIncrementalHFiles.BulkHFileVisitor<TFamily> |
static class |
LoadIncrementalHFiles.LoadQueueItem
Represents an HFile waiting to be loaded.
|
Modifier and Type | Field and Description |
---|---|
static String |
ALWAYS_COPY_FILES |
private static String |
ASSIGN_SEQ_IDS |
private boolean |
assignSeqIds |
private String |
bulkToken |
static String |
CREATE_TABLE_CONF_KEY |
private FsDelegationToken |
fsDelegationToken |
static String |
IGNORE_UNMATCHED_CF_CONF_KEY |
private static org.slf4j.Logger |
LOG |
static String |
MAX_FILES_PER_REGION_PER_FAMILY |
private int |
maxFilesPerRegionPerFamily |
static String |
NAME |
private int |
nrThreads |
private AtomicInteger |
numRetries |
(package private) static String |
RETRY_ON_IO_EXCEPTION |
private RpcControllerFactory |
rpcControllerFactory |
(package private) static String |
TMP_DIR |
private UserProvider |
userProvider |
Constructor and Description |
---|
LoadIncrementalHFiles(org.apache.hadoop.conf.Configuration conf) |
Modifier and Type | Method and Description |
---|---|
protected ClientServiceCallable<byte[]> |
buildClientServiceCallable(Connection conn,
TableName tableName,
byte[] first,
Collection<LoadIncrementalHFiles.LoadQueueItem> lqis,
boolean copyFile) |
protected void |
bulkLoadPhase(Table table,
Connection conn,
ExecutorService pool,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups,
boolean copyFile,
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> item2RegionMap)
This takes the LQI's grouped by likely regions and attempts to bulk load them.
|
private boolean |
checkHFilesCountPerRegionPerFamily(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups) |
private void |
cleanup(Admin admin,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
ExecutorService pool,
SecureBulkLoadClient secureClient) |
private static void |
copyHFileHalf(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path inFile,
org.apache.hadoop.fs.Path outFile,
Reference reference,
ColumnFamilyDescriptor familyDescriptor)
Copy half of an HFile into a new HFile.
|
private ExecutorService |
createExecutorService() |
private void |
createTable(TableName tableName,
String dirPath,
Admin admin)
If the table is created for the first time, then "completebulkload" reads the files twice.
|
private void |
discoverLoadQueue(Deque<LoadIncrementalHFiles.LoadQueueItem> ret,
org.apache.hadoop.fs.Path hfofDir,
boolean validateHFile)
Walk the given directory for all HFiles, and return a Queue containing all such files.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(Map<byte[],List<org.apache.hadoop.fs.Path>> map,
Admin admin,
Table table,
RegionLocator regionLocator,
boolean silence,
boolean copyFile)
Perform a bulk load of the given directory into the given pre-existing table.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(org.apache.hadoop.fs.Path hfofDir,
Admin admin,
Table table,
RegionLocator regionLocator)
Perform a bulk load of the given directory into the given pre-existing table.
|
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
doBulkLoad(org.apache.hadoop.fs.Path hfofDir,
Admin admin,
Table table,
RegionLocator regionLocator,
boolean silence,
boolean copyFile)
Perform a bulk load of the given directory into the given pre-existing table.
|
private String |
getUniqueName() |
protected Pair<List<LoadIncrementalHFiles.LoadQueueItem>,String> |
groupOrSplit(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups,
LoadIncrementalHFiles.LoadQueueItem item,
Table table,
Pair<byte[][],byte[][]> startEndKeys)
Attempt to assign the given load queue item into its target region group.
|
private Pair<org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem>,Set<String>> |
groupOrSplitPhase(Table table,
ExecutorService pool,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
Pair<byte[][],byte[][]> startEndKeys) |
static byte[][] |
inferBoundaries(SortedMap<byte[],Integer> bdryMap)
Infers region boundaries for a new table.
|
private boolean |
isAlwaysCopyFiles() |
private boolean |
isCreateTable() |
private boolean |
isSecureBulkLoadEndpointAvailable() |
private boolean |
isSilence() |
void |
loadHFileQueue(Table table,
Connection conn,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
Pair<byte[][],byte[][]> startEndKeys)
Used by the replication sink to load the hfiles from the source cluster.
|
void |
loadHFileQueue(Table table,
Connection conn,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
Pair<byte[][],byte[][]> startEndKeys,
boolean copyFile)
Used by the replication sink to load the hfiles from the source cluster.
|
static void |
main(String[] args) |
private Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
performBulkLoad(Admin admin,
Table table,
RegionLocator regionLocator,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
ExecutorService pool,
SecureBulkLoadClient secureClient,
boolean copyFile) |
private void |
populateLoadQueue(Deque<LoadIncrementalHFiles.LoadQueueItem> ret,
Map<byte[],List<org.apache.hadoop.fs.Path>> map)
Populate the Queue with given HFiles
|
void |
prepareHFileQueue(Map<byte[],List<org.apache.hadoop.fs.Path>> map,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean silence)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
void |
prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean validateHFile)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
void |
prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir,
Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean validateHFile,
boolean silence)
Prepare a collection of
LoadIncrementalHFiles.LoadQueueItem from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it. |
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
run(Map<byte[],List<org.apache.hadoop.fs.Path>> family2Files,
TableName tableName)
Perform bulk load on the given table.
|
int |
run(String[] args) |
Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> |
run(String hfofDir,
TableName tableName)
Perform bulk load on the given table.
|
void |
setBulkToken(String stagingDir)
Called from replication sink, where it manages bulkToken(staging directory) by itself.
|
private static boolean |
shouldCopyHFileMetaKey(byte[] key) |
(package private) static void |
splitStoreFile(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path inFile,
ColumnFamilyDescriptor familyDesc,
byte[] splitKey,
org.apache.hadoop.fs.Path bottomOut,
org.apache.hadoop.fs.Path topOut)
Split a storefile into a top and bottom half, maintaining the metadata, recreating bloom
filters, etc.
|
private List<LoadIncrementalHFiles.LoadQueueItem> |
splitStoreFile(LoadIncrementalHFiles.LoadQueueItem item,
Table table,
byte[] startKey,
byte[] splitKey) |
private String |
toString(List<Pair<byte[],String>> list) |
protected List<LoadIncrementalHFiles.LoadQueueItem> |
tryAtomicRegionLoad(ClientServiceCallable<byte[]> serviceCallable,
TableName tableName,
byte[] first,
Collection<LoadIncrementalHFiles.LoadQueueItem> lqis)
Attempts to do an atomic load of many hfiles into a region.
|
private void |
usage() |
private void |
validateFamiliesInHFiles(Table table,
Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
boolean silence)
Checks whether there is any invalid family name in HFiles to be bulk loaded.
|
private static <TFamily> void |
visitBulkHFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path bulkDir,
LoadIncrementalHFiles.BulkHFileVisitor<TFamily> visitor)
Iterate over the bulkDir hfiles.
|
private static <TFamily> void |
visitBulkHFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path bulkDir,
LoadIncrementalHFiles.BulkHFileVisitor<TFamily> visitor,
boolean validateHFile)
Iterate over the bulkDir hfiles.
|
private static final org.slf4j.Logger LOG
public static final String NAME
static final String RETRY_ON_IO_EXCEPTION
public static final String MAX_FILES_PER_REGION_PER_FAMILY
private static final String ASSIGN_SEQ_IDS
public static final String CREATE_TABLE_CONF_KEY
public static final String IGNORE_UNMATCHED_CF_CONF_KEY
public static final String ALWAYS_COPY_FILES
static final String TMP_DIR
private final int maxFilesPerRegionPerFamily
private final boolean assignSeqIds
private final FsDelegationToken fsDelegationToken
private final UserProvider userProvider
private final int nrThreads
private AtomicInteger numRetries
private final RpcControllerFactory rpcControllerFactory
public LoadIncrementalHFiles(org.apache.hadoop.conf.Configuration conf)
private void usage()
public void prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean validateHFile) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.hfilesDir
- directory containing list of hfiles to be loaded into the tabletable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablevalidateHFile
- if true hfiles will be validated for its formatIOException
- If any I/O or network error occurredpublic void prepareHFileQueue(org.apache.hadoop.fs.Path hfilesDir, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean validateHFile, boolean silence) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.hfilesDir
- directory containing list of hfiles to be loaded into the tabletable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablevalidateHFile
- if true hfiles will be validated for its formatsilence
- true to ignore unmatched column familiesIOException
- If any I/O or network error occurredpublic void prepareHFileQueue(Map<byte[],List<org.apache.hadoop.fs.Path>> map, Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean silence) throws IOException
LoadIncrementalHFiles.LoadQueueItem
from list of source hfiles contained in the
passed directory and validates whether the prepared queue has all the valid table column
families in it.map
- map of family to List of hfilestable
- table to which hfiles should be loadedqueue
- queue which needs to be loaded into the tablesilence
- true to ignore unmatched column familiesIOException
- If any I/O or network error occurredpublic Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(org.apache.hadoop.fs.Path hfofDir, Admin admin, Table table, RegionLocator regionLocator) throws TableNotFoundException, IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormatadmin
- the Admintable
- the table to load intoregionLocator
- region locatorTableNotFoundException
- if table does not yet existIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(Map<byte[],List<org.apache.hadoop.fs.Path>> map, Admin admin, Table table, RegionLocator regionLocator, boolean silence, boolean copyFile) throws TableNotFoundException, IOException
map
- map of family to List of hfilesadmin
- the Admintable
- the table to load intoregionLocator
- region locatorsilence
- true to ignore unmatched column familiescopyFile
- always copy hfiles if trueTableNotFoundException
- if table does not yet existIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> doBulkLoad(org.apache.hadoop.fs.Path hfofDir, Admin admin, Table table, RegionLocator regionLocator, boolean silence, boolean copyFile) throws TableNotFoundException, IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormatadmin
- the Admintable
- the table to load intoregionLocator
- region locatorsilence
- true to ignore unmatched column familiescopyFile
- always copy hfiles if trueTableNotFoundException
- if table does not yet existIOException
public void loadHFileQueue(Table table, Connection conn, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, Pair<byte[][],byte[][]> startEndKeys) throws IOException
table
- Table to which these hfiles should be loaded toconn
- Connection to usequeue
- LoadIncrementalHFiles.LoadQueueItem
has hfiles yet to be loadedstartEndKeys
- starting and ending row keys of the regionIOException
public void loadHFileQueue(Table table, Connection conn, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, Pair<byte[][],byte[][]> startEndKeys, boolean copyFile) throws IOException
table
- Table to which these hfiles should be loaded toconn
- Connection to usequeue
- LoadIncrementalHFiles.LoadQueueItem
has hfiles yet to be loadedstartEndKeys
- starting and ending row keys of the regionIOException
private Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> performBulkLoad(Admin admin, Table table, RegionLocator regionLocator, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, ExecutorService pool, SecureBulkLoadClient secureClient, boolean copyFile) throws IOException
IOException
protected void bulkLoadPhase(Table table, Connection conn, ExecutorService pool, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups, boolean copyFile, Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> item2RegionMap) throws IOException
protected for testing.
IOException
protected ClientServiceCallable<byte[]> buildClientServiceCallable(Connection conn, TableName tableName, byte[] first, Collection<LoadIncrementalHFiles.LoadQueueItem> lqis, boolean copyFile)
private boolean checkHFilesCountPerRegionPerFamily(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups)
private Pair<org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem>,Set<String>> groupOrSplitPhase(Table table, ExecutorService pool, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, Pair<byte[][],byte[][]> startEndKeys) throws IOException
conn
- the HBase cluster connectiontableName
- the table name of the table to load intopool
- the ExecutorServicequeue
- the queue for LoadQueueItemstartEndKeys
- start and end keysIOException
private List<LoadIncrementalHFiles.LoadQueueItem> splitStoreFile(LoadIncrementalHFiles.LoadQueueItem item, Table table, byte[] startKey, byte[] splitKey) throws IOException
IOException
protected Pair<List<LoadIncrementalHFiles.LoadQueueItem>,String> groupOrSplit(org.apache.hbase.thirdparty.com.google.common.collect.Multimap<ByteBuffer,LoadIncrementalHFiles.LoadQueueItem> regionGroups, LoadIncrementalHFiles.LoadQueueItem item, Table table, Pair<byte[][],byte[][]> startEndKeys) throws IOException
protected for testing
IOException
- if an IO failure is encounteredprotected List<LoadIncrementalHFiles.LoadQueueItem> tryAtomicRegionLoad(ClientServiceCallable<byte[]> serviceCallable, TableName tableName, byte[] first, Collection<LoadIncrementalHFiles.LoadQueueItem> lqis) throws IOException
NOTE: To maintain row atomicity guarantees, region server callable should succeed atomically and fails atomically.
Protected for testing.
IOException
private void createTable(TableName tableName, String dirPath, Admin admin) throws IOException
IOException
private void cleanup(Admin admin, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, ExecutorService pool, SecureBulkLoadClient secureClient) throws IOException
IOException
private String getUniqueName()
private void validateFamiliesInHFiles(Table table, Deque<LoadIncrementalHFiles.LoadQueueItem> queue, boolean silence) throws IOException
IOException
private void populateLoadQueue(Deque<LoadIncrementalHFiles.LoadQueueItem> ret, Map<byte[],List<org.apache.hadoop.fs.Path>> map)
private void discoverLoadQueue(Deque<LoadIncrementalHFiles.LoadQueueItem> ret, org.apache.hadoop.fs.Path hfofDir, boolean validateHFile) throws IOException
IOException
private static <TFamily> void visitBulkHFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path bulkDir, LoadIncrementalHFiles.BulkHFileVisitor<TFamily> visitor) throws IOException
IOException
private static <TFamily> void visitBulkHFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path bulkDir, LoadIncrementalHFiles.BulkHFileVisitor<TFamily> visitor, boolean validateHFile) throws IOException
IOException
private ExecutorService createExecutorService()
private boolean isSecureBulkLoadEndpointAvailable()
static void splitStoreFile(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path inFile, ColumnFamilyDescriptor familyDesc, byte[] splitKey, org.apache.hadoop.fs.Path bottomOut, org.apache.hadoop.fs.Path topOut) throws IOException
IOException
private static void copyHFileHalf(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path inFile, org.apache.hadoop.fs.Path outFile, Reference reference, ColumnFamilyDescriptor familyDescriptor) throws IOException
IOException
private static boolean shouldCopyHFileMetaKey(byte[] key)
private boolean isCreateTable()
private boolean isSilence()
private boolean isAlwaysCopyFiles()
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> run(String hfofDir, TableName tableName) throws IOException
hfofDir
- the directory that was provided as the output path of a job using
HFileOutputFormattableName
- the table to load intoIOException
public Map<LoadIncrementalHFiles.LoadQueueItem,ByteBuffer> run(Map<byte[],List<org.apache.hadoop.fs.Path>> family2Files, TableName tableName) throws IOException
family2Files
- map of family to List of hfilestableName
- the table to load intoIOException
public int run(String[] args) throws Exception
run
in interface org.apache.hadoop.util.Tool
Exception
public void setBulkToken(String stagingDir)
stagingDir
- staging directory pathpublic static byte[][] inferBoundaries(SortedMap<byte[],Integer> bdryMap)
Parameter:
bdryMap is a map between keys to an integer belonging to {+1, -1}
Algo:
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.