@InterfaceAudience.Private public class WALSplitter extends Object
Modifier and Type | Class and Description |
---|---|
(package private) class |
WALSplitter.BoundedLogWriterCreationOutputSink |
(package private) static class |
WALSplitter.CorruptedLogFileException |
static class |
WALSplitter.EntryBuffers
Class which accumulates edits and separates them into a buffer per region
while simultaneously accounting RAM usage.
|
(package private) class |
WALSplitter.LogRecoveredEditsOutputSink
Class that manages the output streams from the log splitting process.
|
static class |
WALSplitter.MutationReplay
A struct used by getMutationsFromWALEntry
|
static class |
WALSplitter.OutputSink
The following class is an abstraction class to provide a common interface to support
different ways of consuming recovered edits.
|
static class |
WALSplitter.PipelineController
Contains some methods to control WAL-entries producer / consumer interactions
|
static class |
WALSplitter.RegionEntryBuffer
A buffer of some number of edits for a given region.
|
static class |
WALSplitter.SinkWriter
Class wraps the actual writer which writes data out and related statistics
|
private static class |
WALSplitter.WriterAndPath
Private data structure that wraps a Writer and its Path, also collecting statistics about the
data written to this output.
|
static class |
WALSplitter.WriterThread |
Modifier and Type | Field and Description |
---|---|
protected org.apache.hadoop.conf.Configuration |
conf |
private static Pattern |
EDITFILES_NAME_PATTERN |
private WALSplitter.EntryBuffers |
entryBuffers |
private org.apache.hadoop.fs.FileStatus |
fileBeingSplit |
protected Map<String,Long> |
lastFlushedSequenceIds |
private static org.slf4j.Logger |
LOG |
private static String |
OLD_SEQUENCE_ID_FILE_SUFFIX |
(package private) WALSplitter.OutputSink |
outputSink |
private static String |
RECOVERED_LOG_TMPFILE_SUFFIX |
protected Map<String,Map<byte[],Long>> |
regionMaxSeqIdInStores |
private static String |
SEQUENCE_ID_FILE_SUFFIX |
private static int |
SEQUENCE_ID_FILE_SUFFIX_LENGTH |
protected LastSequenceId |
sequenceIdChecker |
static boolean |
SPLIT_SKIP_ERRORS_DEFAULT
By default we retry errors in splitting, rather than skipping.
|
static String |
SPLIT_WRITER_CREATION_BOUNDED |
private SplitLogWorkerCoordination |
splitLogWorkerCoordination |
private boolean |
splitWriterCreationBounded |
private MonitoredTask |
status |
protected org.apache.hadoop.fs.Path |
walDir |
private WALFactory |
walFactory |
protected org.apache.hadoop.fs.FileSystem |
walFS |
Constructor and Description |
---|
WALSplitter(WALFactory factory,
org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path walDir,
org.apache.hadoop.fs.FileSystem walFS,
LastSequenceId idChecker,
SplitLogWorkerCoordination splitLogWorkerCoordination) |
Modifier and Type | Method and Description |
---|---|
private static void |
archiveLogs(List<org.apache.hadoop.fs.Path> corruptedLogs,
List<org.apache.hadoop.fs.Path> processedLogs,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.conf.Configuration conf)
Moves processed logs to a oldLogDir after successful processing Moves
corrupted logs (any log that couldn't be successfully parsed to corruptDir
(.corrupt) for later investigation
|
protected WALProvider.Writer |
createWriter(org.apache.hadoop.fs.Path logfile)
Create a new
WALProvider.Writer for writing log splits. |
private static void |
finishSplitLogFile(org.apache.hadoop.fs.Path walDir,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.Path logPath,
org.apache.hadoop.conf.Configuration conf) |
static void |
finishSplitLogFile(String logfile,
org.apache.hadoop.conf.Configuration conf)
Completes the work done by splitLogFile by archiving logs
|
(package private) static String |
formatRecoveredEditsFileName(long seqid) |
private static org.apache.hadoop.fs.Path |
getCompletedRecoveredEditsFilePath(org.apache.hadoop.fs.Path srcPath,
long maximumEditLogSeqNum)
Get the completed recovered edits file path, renaming it to be by last edit
in the file from its first edit.
|
static long |
getMaxRegionSequenceId(org.apache.hadoop.conf.Configuration conf,
RegionInfo region,
CollectionUtils.IOExceptionSupplier<org.apache.hadoop.fs.FileSystem> rootFsSupplier,
CollectionUtils.IOExceptionSupplier<org.apache.hadoop.fs.FileSystem> walFsSupplier)
Deprecated.
Only for compatibility, will be removed in 4.0.0.
|
static long |
getMaxRegionSequenceId(org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.fs.Path regionDir)
Get the max sequence id which is stored in the region directory.
|
private static long |
getMaxSequenceId(org.apache.hadoop.fs.FileStatus[] files) |
static List<WALSplitter.MutationReplay> |
getMutationsFromWALEntry(org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry entry,
CellScanner cells,
Pair<WALKey,WALEdit> logEntry,
Durability durability)
This function is used to construct mutations from a WALEntry.
|
private static WAL.Entry |
getNextLogLine(WAL.Reader in,
org.apache.hadoop.fs.Path path,
boolean skipErrors) |
private int |
getNumOpenWriters()
Get current open writers
|
protected WAL.Reader |
getReader(org.apache.hadoop.fs.FileStatus file,
boolean skipErrors,
CancelableProgressable reporter)
Create a new
WAL.Reader for reading logs to split. |
protected WAL.Reader |
getReader(org.apache.hadoop.fs.Path curLogFile,
CancelableProgressable reporter)
Create a new
WAL.Reader for reading logs to split. |
static org.apache.hadoop.fs.Path |
getRegionDirRecoveredEditsDir(org.apache.hadoop.fs.Path regionDir) |
(package private) static org.apache.hadoop.fs.Path |
getRegionSplitEditsPath(WAL.Entry logEntry,
String fileNameBeingSplit,
org.apache.hadoop.conf.Configuration conf)
Path to a file under RECOVERED_EDITS_DIR directory of the region found in
logEntry named for the sequenceid in the passed
logEntry : e.g. |
private static org.apache.hadoop.fs.FileStatus[] |
getSequenceIdFiles(org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.fs.Path regionDir) |
static NavigableSet<org.apache.hadoop.fs.Path> |
getSplitEditFilesSorted(org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.fs.Path regionDir)
Returns sorted set of edit files made by splitter, excluding files
with '.temp' suffix.
|
private static String |
getTmpRecoveredEditsFileName(String fileName) |
static boolean |
hasRecoveredEdits(org.apache.hadoop.conf.Configuration conf,
RegionInfo regionInfo)
Check whether there is recovered.edits in the region dir
|
static boolean |
isSequenceIdFile(org.apache.hadoop.fs.Path file)
Is the given file a region open sequence id file.
|
static org.apache.hadoop.fs.Path |
moveAsideBadEditsFile(org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.fs.Path edits)
Move aside a bad edits file.
|
static List<org.apache.hadoop.fs.Path> |
split(org.apache.hadoop.fs.Path rootDir,
org.apache.hadoop.fs.Path logDir,
org.apache.hadoop.fs.Path oldLogDir,
org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.conf.Configuration conf,
WALFactory factory) |
(package private) boolean |
splitLogFile(org.apache.hadoop.fs.FileStatus logfile,
CancelableProgressable reporter)
log splitting implementation, splits one log file.
|
static boolean |
splitLogFile(org.apache.hadoop.fs.Path walDir,
org.apache.hadoop.fs.FileStatus logfile,
org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.conf.Configuration conf,
CancelableProgressable reporter,
LastSequenceId idChecker,
SplitLogWorkerCoordination splitLogWorkerCoordination,
WALFactory factory)
Splits a WAL file into region's recovered-edits directory.
|
static void |
writeRegionSequenceIdFile(org.apache.hadoop.fs.FileSystem walFS,
org.apache.hadoop.fs.Path regionDir,
long newMaxSeqId)
Create a file with name as region's max sequence id
|
private static final org.slf4j.Logger LOG
public static final boolean SPLIT_SKIP_ERRORS_DEFAULT
protected final org.apache.hadoop.fs.Path walDir
protected final org.apache.hadoop.fs.FileSystem walFS
protected final org.apache.hadoop.conf.Configuration conf
WALSplitter.OutputSink outputSink
private WALSplitter.EntryBuffers entryBuffers
private SplitLogWorkerCoordination splitLogWorkerCoordination
private final WALFactory walFactory
private MonitoredTask status
protected final LastSequenceId sequenceIdChecker
protected Map<String,Long> lastFlushedSequenceIds
protected Map<String,Map<byte[],Long>> regionMaxSeqIdInStores
private org.apache.hadoop.fs.FileStatus fileBeingSplit
private final boolean splitWriterCreationBounded
public static final String SPLIT_WRITER_CREATION_BOUNDED
private static final Pattern EDITFILES_NAME_PATTERN
private static final String RECOVERED_LOG_TMPFILE_SUFFIX
private static final String SEQUENCE_ID_FILE_SUFFIX
private static final String OLD_SEQUENCE_ID_FILE_SUFFIX
private static final int SEQUENCE_ID_FILE_SUFFIX_LENGTH
WALSplitter(WALFactory factory, org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path walDir, org.apache.hadoop.fs.FileSystem walFS, LastSequenceId idChecker, SplitLogWorkerCoordination splitLogWorkerCoordination)
public static boolean splitLogFile(org.apache.hadoop.fs.Path walDir, org.apache.hadoop.fs.FileStatus logfile, org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.conf.Configuration conf, CancelableProgressable reporter, LastSequenceId idChecker, SplitLogWorkerCoordination splitLogWorkerCoordination, WALFactory factory) throws IOException
If the log file has N regions then N recovered.edits files will be produced.
IOException
public static List<org.apache.hadoop.fs.Path> split(org.apache.hadoop.fs.Path rootDir, org.apache.hadoop.fs.Path logDir, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.conf.Configuration conf, WALFactory factory) throws IOException
IOException
boolean splitLogFile(org.apache.hadoop.fs.FileStatus logfile, CancelableProgressable reporter) throws IOException
logfile
- should be an actual log file.IOException
public static void finishSplitLogFile(String logfile, org.apache.hadoop.conf.Configuration conf) throws IOException
It is invoked by SplitLogManager once it knows that one of the SplitLogWorkers have completed the splitLogFile() part. If the master crashes then this function might get called multiple times.
logfile
- conf
- IOException
private static void finishSplitLogFile(org.apache.hadoop.fs.Path walDir, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.Path logPath, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
private static void archiveLogs(List<org.apache.hadoop.fs.Path> corruptedLogs, List<org.apache.hadoop.fs.Path> processedLogs, org.apache.hadoop.fs.Path oldLogDir, org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.conf.Configuration conf) throws IOException
corruptedLogs
- processedLogs
- oldLogDir
- walFS
- WAL FileSystem to archive files on.conf
- IOException
static org.apache.hadoop.fs.Path getRegionSplitEditsPath(WAL.Entry logEntry, String fileNameBeingSplit, org.apache.hadoop.conf.Configuration conf) throws IOException
logEntry
named for the sequenceid in the passed
logEntry
: e.g. /hbase/some_table/2323432434/recovered.edits/2332.
This method also ensures existence of RECOVERED_EDITS_DIR under the region
creating it if necessary.fs
- logEntry
- rootDir
- HBase root dir.fileNameBeingSplit
- the file being split currently. Used to generate tmp file name.IOException
private static String getTmpRecoveredEditsFileName(String fileName)
private static org.apache.hadoop.fs.Path getCompletedRecoveredEditsFilePath(org.apache.hadoop.fs.Path srcPath, long maximumEditLogSeqNum)
HRegion.replayRecoveredEditsIfAny(java.util.Map<byte[], java.lang.Long>, org.apache.hadoop.hbase.util.CancelableProgressable, org.apache.hadoop.hbase.monitoring.MonitoredTask)
.srcPath
- maximumEditLogSeqNum
- static String formatRecoveredEditsFileName(long seqid)
public static org.apache.hadoop.fs.Path getRegionDirRecoveredEditsDir(org.apache.hadoop.fs.Path regionDir)
regionDir
- This regions directory in the filesystem.regionDir
public static boolean hasRecoveredEdits(org.apache.hadoop.conf.Configuration conf, RegionInfo regionInfo) throws IOException
conf
- confregionInfo
- the region to checkIOException
- IOExceptionpublic static NavigableSet<org.apache.hadoop.fs.Path> getSplitEditFilesSorted(org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.fs.Path regionDir) throws IOException
walFS
- WAL FileSystem used to retrieving split edits files.regionDir
- WAL region dir to look for recovered edits files under.regionDir
as a sorted set.IOException
public static org.apache.hadoop.fs.Path moveAsideBadEditsFile(org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.fs.Path edits) throws IOException
walFS
- WAL FileSystem used to rename bad edits file.edits
- Edits file to move aside.IOException
public static boolean isSequenceIdFile(org.apache.hadoop.fs.Path file)
private static org.apache.hadoop.fs.FileStatus[] getSequenceIdFiles(org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.fs.Path regionDir) throws IOException
IOException
private static long getMaxSequenceId(org.apache.hadoop.fs.FileStatus[] files)
public static long getMaxRegionSequenceId(org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.fs.Path regionDir) throws IOException
IOException
public static void writeRegionSequenceIdFile(org.apache.hadoop.fs.FileSystem walFS, org.apache.hadoop.fs.Path regionDir, long newMaxSeqId) throws IOException
IOException
@Deprecated public static long getMaxRegionSequenceId(org.apache.hadoop.conf.Configuration conf, RegionInfo region, CollectionUtils.IOExceptionSupplier<org.apache.hadoop.fs.FileSystem> rootFsSupplier, CollectionUtils.IOExceptionSupplier<org.apache.hadoop.fs.FileSystem> walFsSupplier) throws IOException
getMaxRegionSequenceId(FileSystem, Path)
until 4.0.0 release.IOException
protected WAL.Reader getReader(org.apache.hadoop.fs.FileStatus file, boolean skipErrors, CancelableProgressable reporter) throws IOException, WALSplitter.CorruptedLogFileException
WAL.Reader
for reading logs to split.file
- IOException
WALSplitter.CorruptedLogFileException
private static WAL.Entry getNextLogLine(WAL.Reader in, org.apache.hadoop.fs.Path path, boolean skipErrors) throws WALSplitter.CorruptedLogFileException, IOException
protected WALProvider.Writer createWriter(org.apache.hadoop.fs.Path logfile) throws IOException
WALProvider.Writer
for writing log splits.IOException
protected WAL.Reader getReader(org.apache.hadoop.fs.Path curLogFile, CancelableProgressable reporter) throws IOException
WAL.Reader
for reading logs to split.IOException
private int getNumOpenWriters()
public static List<WALSplitter.MutationReplay> getMutationsFromWALEntry(org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry entry, CellScanner cells, Pair<WALKey,WALEdit> logEntry, Durability durability) throws IOException
entry
- cells
- logEntry
- pair of WALKey and WALEdit instance stores WALKey and WALEdit instances
extracted from the passed in WALEntry.IOException
Copyright © 2007–2019 The Apache Software Foundation. All rights reserved.