@InterfaceAudience.Private public class WALSplitter extends Object
Modifier and Type | Class and Description |
---|---|
(package private) static class |
WALSplitter.CorruptedLogFileException |
static class |
WALSplitter.EntryBuffers
Class which accumulates edits and separates them into a buffer per region
while simultaneously accounting RAM usage.
|
(package private) class |
WALSplitter.LogRecoveredEditsOutputSink
Class that manages the output streams from the log splitting process.
|
(package private) class |
WALSplitter.LogReplayOutputSink
Class that manages to replay edits from WAL files directly to assigned fail over region servers
|
static class |
WALSplitter.MutationReplay
A struct used by getMutationsFromWALEntry
|
static class |
WALSplitter.OutputSink
The following class is an abstraction class to provide a common interface to support both
existing recovered edits file sink and region server WAL edits replay sink
|
static class |
WALSplitter.PipelineController
Contains some methods to control WAL-entries producer / consumer interactions
|
static class |
WALSplitter.RegionEntryBuffer
A buffer of some number of edits for a given region.
|
private static class |
WALSplitter.RegionServerWriter
Private data structure that wraps a receiving RS and collecting statistics about the data
written to this newly assigned RS.
|
static class |
WALSplitter.SinkWriter
Class wraps the actual writer which writes data out and related statistics
|
private static class |
WALSplitter.WriterAndPath
Private data structure that wraps a Writer and its Path, also collecting statistics about the
data written to this output.
|
static class |
WALSplitter.WriterThread |
Modifier and Type | Field and Description |
---|---|
protected org.apache.hadoop.conf.Configuration |
conf |
(package private) WALSplitter.PipelineController |
controller |
private BaseCoordinatedStateManager |
csm |
private Set<TableName> |
disablingOrDisabledTables |
protected boolean |
distributedLogReplay |
private static Pattern |
EDITFILES_NAME_PATTERN |
(package private) WALSplitter.EntryBuffers |
entryBuffers |
protected String |
failedServerName |
protected org.apache.hadoop.fs.FileSystem |
fs |
protected Map<String,Long> |
lastFlushedSequenceIds |
private static org.apache.commons.logging.Log |
LOG |
private int |
minBatchSize |
private int |
numWriterThreads |
private static String |
OLD_SEQUENCE_ID_FILE_SUFFIX |
(package private) WALSplitter.OutputSink |
outputSink |
private static String |
RECOVERED_LOG_TMPFILE_SUFFIX |
protected Map<String,Map<byte[],Long>> |
regionMaxSeqIdInStores |
protected org.apache.hadoop.fs.Path |
rootDir |
private static String |
SEQUENCE_ID_FILE_SUFFIX |
private static int |
SEQUENCE_ID_FILE_SUFFIX_LENGTH |
protected LastSequenceId |
sequenceIdChecker |
static boolean |
SPLIT_SKIP_ERRORS_DEFAULT
By default we retry errors in splitting, rather than skipping.
|
private MonitoredTask |
status |
private WALFactory |
walFactory |
Constructor and Description |
---|
WALSplitter(WALFactory factory,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path rootDir,
org.apache.hadoop.fs.FileSystem fs,
LastSequenceId idChecker,
CoordinatedStateManager csm,
org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode mode) |
Modifier and Type | Method and Description |
---|---|
private static void |
archiveLogs(List<org.apache.hadoop.fs.Path> corruptedLogs,
List<org.apache.hadoop.fs.Path> processedLogs,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.conf.Configuration conf)
Moves processed logs to a oldLogDir after successful processing Moves
corrupted logs (any log that couldn't be successfully parsed to corruptDir
(.corrupt) for later investigation
|
protected WALProvider.Writer |
createWriter(org.apache.hadoop.fs.Path logfile)
Create a new
WALProvider.Writer for writing log splits. |
(package private) static void |
finishSplitLogFile(org.apache.hadoop.fs.Path rootdir,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.Path logPath,
org.apache.hadoop.conf.Configuration conf) |
static void |
finishSplitLogFile(String logfile,
org.apache.hadoop.conf.Configuration conf)
Completes the work done by splitLogFile by archiving logs
|
(package private) static String |
formatRecoveredEditsFileName(long seqid) |
(package private) static org.apache.hadoop.fs.Path |
getCompletedRecoveredEditsFilePath(org.apache.hadoop.fs.Path srcPath,
Long maximumEditLogSeqNum)
Get the completed recovered edits file path, renaming it to be by last edit
in the file from its first edit.
|
static List<WALSplitter.MutationReplay> |
getMutationsFromWALEntry(org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry entry,
CellScanner cells,
Pair<WALKey,WALEdit> logEntry,
Durability durability)
This function is used to construct mutations from a WALEntry.
|
private static WAL.Entry |
getNextLogLine(WAL.Reader in,
org.apache.hadoop.fs.Path path,
boolean skipErrors) |
private int |
getNumOpenWriters()
Get current open writers
|
protected WAL.Reader |
getReader(org.apache.hadoop.fs.FileStatus file,
boolean skipErrors,
CancelableProgressable reporter)
Create a new
WAL.Reader for reading logs to split. |
protected WAL.Reader |
getReader(org.apache.hadoop.fs.Path curLogFile,
CancelableProgressable reporter)
Create a new
WAL.Reader for reading logs to split. |
static org.apache.hadoop.fs.Path |
getRegionDirRecoveredEditsDir(org.apache.hadoop.fs.Path regiondir) |
(package private) static org.apache.hadoop.fs.Path |
getRegionSplitEditsPath(org.apache.hadoop.fs.FileSystem fs,
WAL.Entry logEntry,
org.apache.hadoop.fs.Path rootDir,
boolean isCreate)
Path to a file under RECOVERED_EDITS_DIR directory of the region found in
logEntry named for the sequenceid in the passed
logEntry : e.g. |
static NavigableSet<org.apache.hadoop.fs.Path> |
getSplitEditFilesSorted(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path regiondir)
Returns sorted set of edit files made by splitter, excluding files
with '.temp' suffix.
|
(package private) static String |
getTmpRecoveredEditsFileName(String fileName) |
static boolean |
isSequenceIdFile(org.apache.hadoop.fs.Path file)
Is the given file a region open sequence id file.
|
static org.apache.hadoop.fs.Path |
moveAsideBadEditsFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path edits)
Move aside a bad edits file.
|
static List<org.apache.hadoop.fs.Path> |
split(org.apache.hadoop.fs.Path rootDir,
org.apache.hadoop.fs.Path logDir,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.conf.Configuration conf,
WALFactory factory) |
(package private) boolean |
splitLogFile(org.apache.hadoop.fs.FileStatus logfile,
CancelableProgressable reporter)
log splitting implementation, splits one log file.
|
static boolean |
splitLogFile(org.apache.hadoop.fs.Path rootDir,
org.apache.hadoop.fs.FileStatus logfile,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.conf.Configuration conf,
CancelableProgressable reporter,
LastSequenceId idChecker,
CoordinatedStateManager cp,
org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode mode,
WALFactory factory)
Splits a WAL file into region's recovered-edits directory.
|
static long |
writeRegionSequenceIdFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path regiondir,
long newSeqId,
long saftyBumper)
Create a file with name as region open sequence id
|
private static final org.apache.commons.logging.Log LOG
public static final boolean SPLIT_SKIP_ERRORS_DEFAULT
protected final org.apache.hadoop.fs.Path rootDir
protected final org.apache.hadoop.fs.FileSystem fs
protected final org.apache.hadoop.conf.Configuration conf
WALSplitter.PipelineController controller
WALSplitter.OutputSink outputSink
WALSplitter.EntryBuffers entryBuffers
private BaseCoordinatedStateManager csm
private final WALFactory walFactory
private MonitoredTask status
protected final LastSequenceId sequenceIdChecker
protected boolean distributedLogReplay
protected String failedServerName
private final int numWriterThreads
private final int minBatchSize
private static final Pattern EDITFILES_NAME_PATTERN
private static final String RECOVERED_LOG_TMPFILE_SUFFIX
private static final String SEQUENCE_ID_FILE_SUFFIX
private static final String OLD_SEQUENCE_ID_FILE_SUFFIX
private static final int SEQUENCE_ID_FILE_SUFFIX_LENGTH
WALSplitter(WALFactory factory, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path rootDir, org.apache.hadoop.fs.FileSystem fs, LastSequenceId idChecker, CoordinatedStateManager csm, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode mode)
public static boolean splitLogFile(org.apache.hadoop.fs.Path rootDir, org.apache.hadoop.fs.FileStatus logfile, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.conf.Configuration conf, CancelableProgressable reporter, LastSequenceId idChecker, CoordinatedStateManager cp, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode mode, WALFactory factory) throws IOException
If the log file has N regions then N recovered.edits files will be produced.
rootDir
- logfile
- fs
- conf
- reporter
- idChecker
- cp
- coordination state managerIOException
public static List<org.apache.hadoop.fs.Path> split(org.apache.hadoop.fs.Path rootDir, org.apache.hadoop.fs.Path logDir, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.conf.Configuration conf, WALFactory factory) throws IOException
IOException
boolean splitLogFile(org.apache.hadoop.fs.FileStatus logfile, CancelableProgressable reporter) throws IOException
logfile
- should be an actual log file.IOException
public static void finishSplitLogFile(String logfile, org.apache.hadoop.conf.Configuration conf) throws IOException
It is invoked by SplitLogManager once it knows that one of the SplitLogWorkers have completed the splitLogFile() part. If the master crashes then this function might get called multiple times.
logfile
- conf
- IOException
static void finishSplitLogFile(org.apache.hadoop.fs.Path rootdir, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.Path logPath, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
private static void archiveLogs(List<org.apache.hadoop.fs.Path> corruptedLogs, List<org.apache.hadoop.fs.Path> processedLogs, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.conf.Configuration conf) throws IOException
corruptedLogs
- processedLogs
- oldLogDir
- fs
- conf
- IOException
static org.apache.hadoop.fs.Path getRegionSplitEditsPath(org.apache.hadoop.fs.FileSystem fs, WAL.Entry logEntry, org.apache.hadoop.fs.Path rootDir, boolean isCreate) throws IOException
logEntry
named for the sequenceid in the passed
logEntry
: e.g. /hbase/some_table/2323432434/recovered.edits/2332.
This method also ensures existence of RECOVERED_EDITS_DIR under the region
creating it if necessary.fs
- logEntry
- rootDir
- HBase root dir.IOException
static org.apache.hadoop.fs.Path getCompletedRecoveredEditsFilePath(org.apache.hadoop.fs.Path srcPath, Long maximumEditLogSeqNum)
HRegion.replayRecoveredEditsIfAny(org.apache.hadoop.fs.Path, java.util.Map<byte[], java.lang.Long>, org.apache.hadoop.hbase.util.CancelableProgressable, org.apache.hadoop.hbase.monitoring.MonitoredTask)
.srcPath
- maximumEditLogSeqNum
- static String formatRecoveredEditsFileName(long seqid)
public static org.apache.hadoop.fs.Path getRegionDirRecoveredEditsDir(org.apache.hadoop.fs.Path regiondir)
regiondir
- This regions directory in the filesystem.regiondir
public static NavigableSet<org.apache.hadoop.fs.Path> getSplitEditFilesSorted(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path regiondir) throws IOException
fs
- regiondir
- regiondir
as a sorted set.IOException
public static org.apache.hadoop.fs.Path moveAsideBadEditsFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path edits) throws IOException
fs
- edits
- Edits file to move aside.IOException
public static boolean isSequenceIdFile(org.apache.hadoop.fs.Path file)
public static long writeRegionSequenceIdFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path regiondir, long newSeqId, long saftyBumper) throws IOException
fs
- regiondir
- newSeqId
- saftyBumper
- IOException
protected WAL.Reader getReader(org.apache.hadoop.fs.FileStatus file, boolean skipErrors, CancelableProgressable reporter) throws IOException, WALSplitter.CorruptedLogFileException
WAL.Reader
for reading logs to split.file
- IOException
WALSplitter.CorruptedLogFileException
private static WAL.Entry getNextLogLine(WAL.Reader in, org.apache.hadoop.fs.Path path, boolean skipErrors) throws WALSplitter.CorruptedLogFileException, IOException
protected WALProvider.Writer createWriter(org.apache.hadoop.fs.Path logfile) throws IOException
WALProvider.Writer
for writing log splits.IOException
protected WAL.Reader getReader(org.apache.hadoop.fs.Path curLogFile, CancelableProgressable reporter) throws IOException
WAL.Reader
for reading logs to split.IOException
private int getNumOpenWriters()
public static List<WALSplitter.MutationReplay> getMutationsFromWALEntry(org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry entry, CellScanner cells, Pair<WALKey,WALEdit> logEntry, Durability durability) throws IOException
entry
- cells
- logEntry
- pair of WALKey and WALEdit instance stores WALKey and WALEdit instances
extracted from the passed in WALEntry.durability
- IOException
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.