@InterfaceAudience.Private public abstract class AbstractFSWAL<W extends WALProvider.WriterBase> extends Object implements WAL
WAL
to go against FileSystem
; i.e. keep WALs in HDFS. Only one
WAL is ever being written at a time. When a WAL hits a configured maximum size, it is rolled.
This is done internal to the implementation.
As data is flushed from the MemStore to other on-disk structures (files sorted by key, hfiles), a WAL becomes obsolete. We can let go of all the log edits/entries for a given HRegion-sequence id. A bunch of work in the below is done keeping account of these region sequence ids -- what is flushed out to hfiles, and what is yet in WAL and in memory only.
It is only practical to delete entire files. Thus, we delete an entire on-disk file
F
when all of the edits in F
have a log-sequence-id that's older
(smaller) than the most-recent flush.
To read an WAL, call WALFactory.createStreamReader(FileSystem, Path)
for one way read,
call WALFactory.createTailingReader(FileSystem, Path, Configuration, long)
for
replication where we may want to tail the active WAL file.
Modifier and Type | Class and Description |
---|---|
private static class |
AbstractFSWAL.WALProps |
Modifier and Type | Field and Description |
---|---|
protected Abortable |
abortable |
private int |
archiveRetries |
private long |
batchSize |
protected long |
blocksize
Block size to use writing files.
|
protected boolean |
closed |
protected ExecutorService |
closeExecutor |
protected org.apache.hadoop.conf.Configuration |
conf
conf object
|
protected ExecutorService |
consumeExecutor |
private Lock |
consumeLock |
protected Runnable |
consumer |
private AtomicBoolean |
consumerScheduled |
protected WALCoprocessorHost |
coprocessorHost |
protected static int |
DEFAULT_ROLL_ON_SYNC_TIME_MS |
protected static int |
DEFAULT_SLOW_SYNC_ROLL_INTERVAL_MS |
protected static int |
DEFAULT_SLOW_SYNC_ROLL_THRESHOLD |
protected static int |
DEFAULT_SLOW_SYNC_TIME_MS |
static long |
DEFAULT_WAL_BATCH_SIZE |
static int |
DEFAULT_WAL_SHUTDOWN_WAIT_TIMEOUT_MS |
protected static int |
DEFAULT_WAL_SYNC_TIMEOUT_MS |
private int |
epochAndState |
private long |
fileLengthAtLastSync |
protected AtomicLong |
filenum |
protected org.apache.hadoop.fs.FileSystem |
fs
file system instance
|
protected Supplier<Boolean> |
hasConsumerTask |
protected long |
highestProcessedAppendTxid |
private long |
highestProcessedAppendTxidAtLastSync |
protected AtomicLong |
highestSyncedTxid
Updated to the transaction id of the last successful sync call.
|
protected long |
highestUnsyncedTxid
The highest known outstanding unsync'd WALEdit transaction id.
|
protected String |
implClassName
The class name of the runtime implementation, used as prefix for logging/tracing.
|
protected Map<String,W> |
inflightWALClosures
Tracks the logs in the process of being closed.
|
private long |
lastTimeCheckLowReplication |
private long |
lastTimeCheckSlowSync |
protected List<WALActionsListener> |
listeners
Listeners that are called on WAL events.
|
private static org.slf4j.Logger |
LOG |
(package private) Comparator<org.apache.hadoop.fs.Path> |
LOG_NAME_COMPARATOR
WAL Comparator; it compares the timestamp (log filenum), present in the log file name.
|
private ExecutorService |
logArchiveExecutor |
protected long |
logrollsize |
private boolean |
markerEditOnly |
private static int |
MAX_EPOCH |
static String |
MAX_LOGS |
protected int |
maxLogs |
private long |
nextLogTooOldNs |
protected AtomicInteger |
numEntries |
protected org.apache.hadoop.fs.PathFilter |
ourFiles
Matches just those wal files that belong to this wal instance.
|
protected String |
prefixPathStr
Prefix used when checking for wal membership.
|
private boolean |
readyForRolling |
private Condition |
readyForRollingCond |
private org.apache.hadoop.fs.FileSystem |
remoteFs |
private org.apache.hadoop.fs.Path |
remoteWALDir |
static String |
RING_BUFFER_SLOT_COUNT |
protected static String |
ROLL_ON_SYNC_TIME_MS |
protected long |
rollOnSyncNs
The slow sync will be logged; the very slow sync will cause the WAL to be rolled.
|
protected AtomicBoolean |
rollRequested |
protected ReentrantLock |
rollWriterLock
This lock makes sure only one log roll runs at a time.
|
private static Comparator<SyncFuture> |
SEQ_COMPARATOR |
protected SequenceIdAccounting |
sequenceIdAccounting
Class that does accounting of sequenceids in WAL subsystem.
|
protected boolean |
shouldShutDownConsumeExecutorWhenClose |
protected AtomicBoolean |
shutdown |
private boolean |
skipRemoteWAL |
protected static String |
SLOW_SYNC_ROLL_INTERVAL_MS |
protected static String |
SLOW_SYNC_ROLL_THRESHOLD |
protected static String |
SLOW_SYNC_TIME_MS |
protected int |
slowSyncCheckInterval |
protected AtomicInteger |
slowSyncCount |
protected long |
slowSyncNs
The slow sync will be logged; the very slow sync will cause the WAL to be rolled.
|
protected int |
slowSyncRollThreshold |
private static long |
SURVIVED_TOO_LONG_LOG_INTERVAL_NS
Don't log blocking regions more frequently than this.
|
private static int |
SURVIVED_TOO_LONG_SEC_DEFAULT |
private static String |
SURVIVED_TOO_LONG_SEC_KEY |
protected SyncFutureCache |
syncFutureCache
A cache of sync futures reused by threads.
|
protected SortedSet<SyncFuture> |
syncFutures |
protected AtomicLong |
totalLogSize
The total size of wal
|
protected Deque<FSWALEntry> |
toWriteAppends |
protected Deque<FSWALEntry> |
unackedAppends |
protected boolean |
useHsync |
private com.lmax.disruptor.RingBuffer<RingBufferTruck> |
waitingConsumePayloads |
private com.lmax.disruptor.Sequence |
waitingConsumePayloadsGatingSequence |
private int |
waitOnShutdownInSeconds |
private String |
waitOnShutdownInSecondsConfigKey |
static boolean |
WAL_AVOID_LOCAL_WRITES_DEFAULT |
static String |
WAL_AVOID_LOCAL_WRITES_KEY |
static String |
WAL_BATCH_SIZE |
static String |
WAL_ROLL_MULTIPLIER |
static String |
WAL_SHUTDOWN_WAIT_TIMEOUT_MS |
static String |
WAL_SYNC_TIMEOUT_MS |
protected org.apache.hadoop.fs.Path |
walArchiveDir
dir path where old logs are kept.
|
protected org.apache.hadoop.fs.Path |
walDir
WAL directory, where all WAL files would be placed.
|
protected ConcurrentNavigableMap<org.apache.hadoop.fs.Path,AbstractFSWAL.WALProps> |
walFile2Props
Map of WAL log file to properties.
|
protected String |
walFilePrefix
Prefix of a WAL file, usually the region server name it is hosted on.
|
protected String |
walFileSuffix
Suffix included on generated wal file names
|
protected long |
walShutdownTimeout |
private long |
walSyncTimeoutNs |
private long |
walTooOldNs |
(package private) W |
writer
Current log file.
|
Modifier | Constructor and Description |
---|---|
protected |
AbstractFSWAL(org.apache.hadoop.fs.FileSystem fs,
Abortable abortable,
org.apache.hadoop.fs.Path rootDir,
String logDir,
String archiveDir,
org.apache.hadoop.conf.Configuration conf,
List<WALActionsListener> listeners,
boolean failIfWALExists,
String prefix,
String suffix,
org.apache.hadoop.fs.FileSystem remoteFs,
org.apache.hadoop.fs.Path remoteWALDir) |
Modifier and Type | Method and Description |
---|---|
void |
abortCacheFlush(byte[] encodedRegionName)
Abort a cache flush.
|
protected long |
append(RegionInfo hri,
WALKeyImpl key,
WALEdit edits,
boolean inMemstore)
Append a set of edits to the WAL.
|
private void |
appendAndSync() |
long |
appendData(RegionInfo info,
WALKeyImpl key,
WALEdit edits)
Append a set of data edits to the WAL.
|
protected boolean |
appendEntry(W writer,
FSWALEntry entry) |
long |
appendMarker(RegionInfo info,
WALKeyImpl key,
WALEdit edits)
Append an operational 'meta' event marker edit to the WAL.
|
protected void |
archive(Pair<org.apache.hadoop.fs.Path,Long> log) |
protected void |
archiveLogFile(org.apache.hadoop.fs.Path p) |
protected void |
atHeadOfRingBufferEventHandlerAppend()
Exposed for testing only.
|
protected void |
blockOnSync(SyncFuture syncFuture) |
private int |
calculateMaxLogFiles(org.apache.hadoop.conf.Configuration conf,
long logRollSize) |
void |
checkLogLowReplication(long checkInterval) |
protected void |
checkSlowSyncCount() |
private void |
cleanOldLogs()
Archive old logs.
|
void |
close()
Caller no longer needs any edits from this WAL.
|
protected void |
closeWriter(W writer,
org.apache.hadoop.fs.Path path) |
void |
completeCacheFlush(byte[] encodedRegionName,
long maxFlushedSeqId)
Complete the cache flush.
|
protected org.apache.hadoop.fs.Path |
computeFilename(long filenum)
This is a convenience method that computes a new filename with a given file-number.
|
private void |
consume() |
private IOException |
convertInterruptedExceptionToIOException(InterruptedException ie) |
private W |
createCombinedWriter(W localWriter,
org.apache.hadoop.fs.Path localPath) |
protected abstract W |
createCombinedWriter(W localWriter,
W remoteWriter) |
protected void |
createSingleThreadPoolConsumeExecutor(String walType,
org.apache.hadoop.fs.Path rootDir,
String prefix) |
private io.opentelemetry.api.trace.Span |
createSpan(String name) |
protected abstract W |
createWriterInstance(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path) |
protected abstract void |
doAppend(W writer,
FSWALEntry entry) |
protected abstract boolean |
doCheckLogLowReplication() |
protected boolean |
doCheckSlowSync()
Returns true if we exceeded the slow sync roll threshold over the last check interval
|
protected void |
doCleanUpResources() |
protected void |
doReplaceWriter(org.apache.hadoop.fs.Path oldPath,
org.apache.hadoop.fs.Path newPath,
W nextWriter)
Notice that you need to clear the
rollRequested flag in this method, as the new writer
will begin to work before returning from this method. |
protected void |
doShutdown() |
protected void |
doSync(boolean forceSync) |
protected void |
doSync(long txid,
boolean forceSync) |
protected abstract CompletableFuture<Long> |
doWriterSync(W writer,
boolean shouldUseHsync,
long txidWhenSyn) |
private void |
drainNonMarkerEditsAndFailSyncs() |
private static IOException |
ensureIOException(Throwable t) |
private static int |
epoch(int epochAndState) |
(package private) Map<byte[],List<byte[]>> |
findRegionsToForceFlush()
If the number of un-archived WAL files ('live' WALs) is greater than maximum allowed, check the
first (oldest) WAL, and return those regions which should be flushed so that it can be
let-go/'archived'.
|
private int |
finishSync() |
private int |
finishSyncLowerThanTxid(long txid) |
WALCoprocessorHost |
getCoprocessorHost()
Returns Coprocessor host.
|
org.apache.hadoop.fs.Path |
getCurrentFileName()
This is a convenience method that computes a new filename with a given using the current WAL
file-number
|
long |
getEarliestMemStoreSeqNum(byte[] encodedRegionName)
Gets the earliest unflushed sequence id in the memstore for the region.
|
long |
getEarliestMemStoreSeqNum(byte[] encodedRegionName,
byte[] familyName)
Gets the earliest unflushed sequence id in the memstore for the store.
|
long |
getFilenum() |
protected long |
getFileNumFromFileName(org.apache.hadoop.fs.Path fileName)
A log file has a creation timestamp (in ms) in its file name (
filenum . |
(package private) org.apache.hadoop.fs.FileStatus[] |
getFiles()
Get the backing files associated with this WAL.
|
int |
getInflightWALCloseCount()
Returns number of WALs currently in the process of closing.
|
private static long |
getLastTxid(Deque<FSWALEntry> queue) |
long |
getLogFileSize()
Returns the size of log files in use
|
OptionalLong |
getLogFileSizeIfBeingWritten(org.apache.hadoop.fs.Path path)
if the given
path is being written currently, then return its length. |
(package private) abstract int |
getLogReplication()
This method gets the datanode replication count for the current WAL.
|
private org.apache.hadoop.fs.Path |
getNewPath()
retrieve the next path to use for writing.
|
int |
getNumLogFiles()
Returns the number of log files in use
|
int |
getNumRolledLogFiles()
Returns the number of rolled log files
|
org.apache.hadoop.fs.Path |
getOldPath() |
(package private) abstract org.apache.hadoop.hdfs.protocol.DatanodeInfo[] |
getPipeline()
This method gets the pipeline for the current WAL.
|
protected int |
getPreallocatedEventCount() |
protected long |
getSyncedTxid(long processedTxid,
long completableFutureResult)
This method is to adapt
FSHLog and AsyncFSWAL . |
protected SyncFuture |
getSyncFuture(long sequence,
boolean forceSync) |
(package private) long |
getUnflushedEntriesCount() |
static org.apache.hadoop.fs.Path |
getWALArchivePath(org.apache.hadoop.fs.Path archiveDir,
org.apache.hadoop.fs.Path p) |
(package private) W |
getWriter() |
void |
init()
Used to initialize the WAL.
|
private boolean |
isHsync(long beginTxid,
long endTxid) |
protected boolean |
isLogRollRequested() |
(package private) boolean |
isUnflushedEntries() |
protected boolean |
isWriterBroken() |
protected void |
logRollAndSetupWalProps(org.apache.hadoop.fs.Path oldPath,
org.apache.hadoop.fs.Path newPath,
long oldFileLen) |
static void |
main(String[] args)
Pass one or more log file names, and it will either dump out a text version on
stdout or split the specified log files. |
private void |
markClosedAndClean(org.apache.hadoop.fs.Path path)
Mark this WAL file as closed and call cleanOldLogs to see if we can archive this file.
|
protected void |
markFutureDoneAndOffer(SyncFuture future,
long txid,
Throwable t)
Helper that marks the future as DONE and offers it back to the cache.
|
private void |
onAppendEntryFailed(IOException exception) |
private void |
onException(long epochWhenSync,
Throwable error) |
protected abstract void |
onWriterReplaced(W nextWriter) |
private long |
postAppend(WAL.Entry e,
long elapsedTime) |
protected void |
postSync(long timeInNanos,
int handlerSyncs) |
void |
registerWALActionsListener(WALActionsListener listener)
Registers WALActionsListener
|
(package private) org.apache.hadoop.fs.Path |
replaceWriter(org.apache.hadoop.fs.Path oldPath,
org.apache.hadoop.fs.Path newPath,
W nextWriter)
Cleans up current writer closing it and then puts in place the passed in
nextWriter . |
void |
requestLogRoll() |
protected void |
requestLogRoll(WALActionsListener.RollRequestReason reason) |
Map<byte[],List<byte[]>> |
rollWriter()
Roll the log writer.
|
Map<byte[],List<byte[]>> |
rollWriter(boolean force)
Roll the log writer.
|
private Map<byte[],List<byte[]>> |
rollWriterInternal(boolean force) |
protected void |
setWaitOnShutdownInSeconds(int waitOnShutdownInSeconds,
String waitOnShutdownInSecondsConfigKey) |
private boolean |
shouldScheduleConsumer() |
void |
shutdown()
Stop accepting new writes.
|
void |
skipRemoteWAL(boolean markerEditOnly)
Tell the WAL that when creating new writer you can skip creating the remote writer.
|
private static void |
split(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path p) |
protected long |
stampSequenceIdAndPublishToRingBuffer(RegionInfo hri,
WALKeyImpl key,
WALEdit edits,
boolean inMemstore,
com.lmax.disruptor.RingBuffer<RingBufferTruck> ringBuffer) |
Long |
startCacheFlush(byte[] encodedRegionName,
Map<byte[],Long> familyToSeq) |
Long |
startCacheFlush(byte[] encodedRegionName,
Set<byte[]> families)
WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be
able to do accounting to figure which WALs can be let go.
|
void |
sync()
Sync what we have in the WAL.
|
void |
sync(boolean forceSync) |
void |
sync(long txid)
Sync the WAL if the txId was not already sync'd.
|
void |
sync(long txid,
boolean forceSync) |
private void |
sync(W writer) |
private void |
syncCompleted(long epochWhenSync,
W writer,
long processedTxid,
long startTimeNs) |
private void |
syncFailed(long epochWhenSync,
Throwable error) |
private void |
tellListenersAboutPostLogRoll(org.apache.hadoop.fs.Path oldPath,
org.apache.hadoop.fs.Path newPath)
Tell listeners about post log roll.
|
private void |
tellListenersAboutPreLogRoll(org.apache.hadoop.fs.Path oldPath,
org.apache.hadoop.fs.Path newPath)
Tell listeners about pre log roll.
|
String |
toString()
Human readable identifying information about the state of this WAL.
|
private boolean |
trySetReadyForRolling() |
boolean |
unregisterWALActionsListener(WALActionsListener listener)
Unregisters WALActionsListener
|
void |
updateStore(byte[] encodedRegionName,
byte[] familyName,
Long sequenceid,
boolean onlyIfGreater)
updates the sequence number of a specific store.
|
private static void |
usage() |
protected void |
waitForSafePoint() |
private static boolean |
waitingRoll(int epochAndState) |
private static boolean |
writerBroken(int epochAndState) |
private static final org.slf4j.Logger LOG
private static final Comparator<SyncFuture> SEQ_COMPARATOR
private static final String SURVIVED_TOO_LONG_SEC_KEY
private static final int SURVIVED_TOO_LONG_SEC_DEFAULT
private static final long SURVIVED_TOO_LONG_LOG_INTERVAL_NS
protected static final String SLOW_SYNC_TIME_MS
protected static final int DEFAULT_SLOW_SYNC_TIME_MS
protected static final String ROLL_ON_SYNC_TIME_MS
protected static final int DEFAULT_ROLL_ON_SYNC_TIME_MS
protected static final String SLOW_SYNC_ROLL_THRESHOLD
protected static final int DEFAULT_SLOW_SYNC_ROLL_THRESHOLD
protected static final String SLOW_SYNC_ROLL_INTERVAL_MS
protected static final int DEFAULT_SLOW_SYNC_ROLL_INTERVAL_MS
public static final String WAL_SYNC_TIMEOUT_MS
protected static final int DEFAULT_WAL_SYNC_TIMEOUT_MS
public static final String WAL_ROLL_MULTIPLIER
public static final String MAX_LOGS
public static final String RING_BUFFER_SLOT_COUNT
public static final String WAL_SHUTDOWN_WAIT_TIMEOUT_MS
public static final int DEFAULT_WAL_SHUTDOWN_WAIT_TIMEOUT_MS
public static final String WAL_BATCH_SIZE
public static final long DEFAULT_WAL_BATCH_SIZE
public static final String WAL_AVOID_LOCAL_WRITES_KEY
public static final boolean WAL_AVOID_LOCAL_WRITES_DEFAULT
protected final org.apache.hadoop.fs.FileSystem fs
protected final org.apache.hadoop.fs.Path walDir
private final org.apache.hadoop.fs.FileSystem remoteFs
private final org.apache.hadoop.fs.Path remoteWALDir
protected final org.apache.hadoop.fs.Path walArchiveDir
protected final org.apache.hadoop.fs.PathFilter ourFiles
protected final String walFilePrefix
protected final String walFileSuffix
protected final String prefixPathStr
protected final WALCoprocessorHost coprocessorHost
protected final org.apache.hadoop.conf.Configuration conf
protected final List<WALActionsListener> listeners
protected final Map<String,W extends WALProvider.WriterBase> inflightWALClosures
protected final SequenceIdAccounting sequenceIdAccounting
protected final long slowSyncNs
protected final long rollOnSyncNs
protected final int slowSyncRollThreshold
protected final int slowSyncCheckInterval
protected final AtomicInteger slowSyncCount
private final long walSyncTimeoutNs
private final long walTooOldNs
protected final long logrollsize
protected final long blocksize
protected final int maxLogs
protected final boolean useHsync
protected final ReentrantLock rollWriterLock
protected final AtomicLong filenum
protected final AtomicInteger numEntries
protected volatile long highestUnsyncedTxid
protected final AtomicLong highestSyncedTxid
highestUnsyncedTxid
for case where we have an append where a sync has not yet come in
for it.protected final AtomicLong totalLogSize
volatile W extends WALProvider.WriterBase writer
private volatile long lastTimeCheckLowReplication
private volatile long lastTimeCheckSlowSync
protected volatile boolean closed
protected final AtomicBoolean shutdown
protected final long walShutdownTimeout
private long nextLogTooOldNs
final Comparator<org.apache.hadoop.fs.Path> LOG_NAME_COMPARATOR
protected final ConcurrentNavigableMap<org.apache.hadoop.fs.Path,AbstractFSWAL.WALProps> walFile2Props
protected final SyncFutureCache syncFutureCache
protected final String implClassName
Performance testing shows getClass().getSimpleName() might be a bottleneck so we store it here, refer to HBASE-17676 for more details
protected final AtomicBoolean rollRequested
protected final ExecutorService closeExecutor
private final ExecutorService logArchiveExecutor
private final int archiveRetries
protected ExecutorService consumeExecutor
private final Lock consumeLock
protected Supplier<Boolean> hasConsumerTask
private static final int MAX_EPOCH
private volatile int epochAndState
private boolean readyForRolling
private final Condition readyForRollingCond
private final com.lmax.disruptor.RingBuffer<RingBufferTruck> waitingConsumePayloads
private final com.lmax.disruptor.Sequence waitingConsumePayloadsGatingSequence
private final AtomicBoolean consumerScheduled
private final long batchSize
protected final Deque<FSWALEntry> toWriteAppends
protected final Deque<FSWALEntry> unackedAppends
protected final SortedSet<SyncFuture> syncFutures
protected long highestProcessedAppendTxid
private long fileLengthAtLastSync
private long highestProcessedAppendTxidAtLastSync
private int waitOnShutdownInSeconds
private String waitOnShutdownInSecondsConfigKey
protected boolean shouldShutDownConsumeExecutorWhenClose
private volatile boolean skipRemoteWAL
private volatile boolean markerEditOnly
protected AbstractFSWAL(org.apache.hadoop.fs.FileSystem fs, Abortable abortable, org.apache.hadoop.fs.Path rootDir, String logDir, String archiveDir, org.apache.hadoop.conf.Configuration conf, List<WALActionsListener> listeners, boolean failIfWALExists, String prefix, String suffix, org.apache.hadoop.fs.FileSystem remoteFs, org.apache.hadoop.fs.Path remoteWALDir) throws FailedLogCloseException, IOException
FailedLogCloseException
IOException
public long getFilenum()
protected long getFileNumFromFileName(org.apache.hadoop.fs.Path fileName)
filenum
. This helper
method returns the creation timestamp from a given log file. It extracts the timestamp assuming
the filename is created with the computeFilename(long filenum)
method.private int calculateMaxLogFiles(org.apache.hadoop.conf.Configuration conf, long logRollSize)
protected final int getPreallocatedEventCount()
protected final void setWaitOnShutdownInSeconds(int waitOnShutdownInSeconds, String waitOnShutdownInSecondsConfigKey)
protected final void createSingleThreadPoolConsumeExecutor(String walType, org.apache.hadoop.fs.Path rootDir, String prefix)
public void init() throws IOException
init
in interface WAL
IOException
public void registerWALActionsListener(WALActionsListener listener)
WAL
registerWALActionsListener
in interface WAL
public boolean unregisterWALActionsListener(WALActionsListener listener)
WAL
unregisterWALActionsListener
in interface WAL
public WALCoprocessorHost getCoprocessorHost()
WAL
getCoprocessorHost
in interface WAL
public Long startCacheFlush(byte[] encodedRegionName, Set<byte[]> families)
WAL
Currently, it is expected that the update lock is held for the region; i.e. no concurrent appends while we set up cache flush.
startCacheFlush
in interface WAL
families
- Families to flush. May be a subset of all families in the region.HConstants.NO_SEQNUM
if we are flushing the whole region OR if we are
flushing a subset of all families but there are no edits in those families not being
flushed; in other words, this is effectively same as a flush of all of the region
though we were passed a subset of regions. Otherwise, it returns the sequence id of the
oldest/lowest outstanding edit.WAL.completeCacheFlush(byte[], long)
,
WAL.abortCacheFlush(byte[])
public Long startCacheFlush(byte[] encodedRegionName, Map<byte[],Long> familyToSeq)
startCacheFlush
in interface WAL
public void completeCacheFlush(byte[] encodedRegionName, long maxFlushedSeqId)
WAL
completeCacheFlush
in interface WAL
encodedRegionName
- Encoded region name.maxFlushedSeqId
- The maxFlushedSeqId for this flush. There is no edit in memory that is
less that this sequence id.WAL.startCacheFlush(byte[], Set)
,
WAL.abortCacheFlush(byte[])
public void abortCacheFlush(byte[] encodedRegionName)
WAL
abortCacheFlush
in interface WAL
encodedRegionName
- Encoded region name.public long getEarliestMemStoreSeqNum(byte[] encodedRegionName)
WAL
getEarliestMemStoreSeqNum
in interface WAL
encodedRegionName
- The region to get the number for.public long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName)
WAL
getEarliestMemStoreSeqNum
in interface WAL
encodedRegionName
- The region to get the number for.familyName
- The family to get the number for.public Map<byte[],List<byte[]>> rollWriter() throws FailedLogCloseException, IOException
WAL
rollWriter
in interface WAL
RegionInfo.getEncodedName()
FailedLogCloseException
IOException
public final void sync() throws IOException
WAL
sync
in interface WAL
IOException
public final void sync(long txid) throws IOException
WAL
sync
in interface WAL
txid
- Transaction id to sync to.IOException
public final void sync(boolean forceSync) throws IOException
sync
in interface WAL
forceSync
- Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
vs hsync.IOException
public final void sync(long txid, boolean forceSync) throws IOException
sync
in interface WAL
txid
- Transaction id to sync to.forceSync
- Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
vs hsync.IOException
protected org.apache.hadoop.fs.Path computeFilename(long filenum)
filenum
- to usepublic org.apache.hadoop.fs.Path getCurrentFileName()
private org.apache.hadoop.fs.Path getNewPath() throws IOException
IOException
public org.apache.hadoop.fs.Path getOldPath()
private void tellListenersAboutPreLogRoll(org.apache.hadoop.fs.Path oldPath, org.apache.hadoop.fs.Path newPath) throws IOException
IOException
private void tellListenersAboutPostLogRoll(org.apache.hadoop.fs.Path oldPath, org.apache.hadoop.fs.Path newPath) throws IOException
IOException
public int getNumRolledLogFiles()
public int getNumLogFiles()
Map<byte[],List<byte[]>> findRegionsToForceFlush() throws IOException
IOException
private void markClosedAndClean(org.apache.hadoop.fs.Path path)
private void cleanOldLogs()
public static org.apache.hadoop.fs.Path getWALArchivePath(org.apache.hadoop.fs.Path archiveDir, org.apache.hadoop.fs.Path p)
protected void archiveLogFile(org.apache.hadoop.fs.Path p) throws IOException
IOException
protected final void logRollAndSetupWalProps(org.apache.hadoop.fs.Path oldPath, org.apache.hadoop.fs.Path newPath, long oldFileLen)
private io.opentelemetry.api.trace.Span createSpan(String name)
org.apache.hadoop.fs.Path replaceWriter(org.apache.hadoop.fs.Path oldPath, org.apache.hadoop.fs.Path newPath, W nextWriter) throws IOException
nextWriter
.
oldPath
- may be nullnewPath
- may be nullnextWriter
- may be nullnewPath
IOException
- if there is a problem flushing or closing the underlying FSprotected final void blockOnSync(SyncFuture syncFuture) throws IOException
IOException
private static IOException ensureIOException(Throwable t)
private IOException convertInterruptedExceptionToIOException(InterruptedException ie)
private W createCombinedWriter(W localWriter, org.apache.hadoop.fs.Path localPath) throws IOException, CommonFSUtils.StreamLacksCapabilityException
private Map<byte[],List<byte[]>> rollWriterInternal(boolean force) throws IOException
IOException
public Map<byte[],List<byte[]>> rollWriter(boolean force) throws IOException
WAL
rollWriter
in interface WAL
RegionInfo.getEncodedName()
IOException
public long getLogFileSize()
public void requestLogRoll()
org.apache.hadoop.fs.FileStatus[] getFiles() throws IOException
IOException
public void shutdown() throws IOException
WAL
shutdown
in interface WAL
IOException
public void close() throws IOException
WAL
close
in interface Closeable
close
in interface AutoCloseable
close
in interface WAL
IOException
public int getInflightWALCloseCount()
public void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, boolean onlyIfGreater)
updateStore
in interface WAL
protected final SyncFuture getSyncFuture(long sequence, boolean forceSync)
protected boolean isLogRollRequested()
protected final void requestLogRoll(WALActionsListener.RollRequestReason reason)
long getUnflushedEntriesCount()
boolean isUnflushedEntries()
protected void atHeadOfRingBufferEventHandlerAppend()
protected final boolean appendEntry(W writer, FSWALEntry entry) throws IOException
IOException
private long postAppend(WAL.Entry e, long elapsedTime) throws IOException
IOException
protected final void postSync(long timeInNanos, int handlerSyncs)
protected final long stampSequenceIdAndPublishToRingBuffer(RegionInfo hri, WALKeyImpl key, WALEdit edits, boolean inMemstore, com.lmax.disruptor.RingBuffer<RingBufferTruck> ringBuffer) throws IOException
IOException
public String toString()
WAL
public OptionalLong getLogFileSizeIfBeingWritten(org.apache.hadoop.fs.Path path)
path
is being written currently, then return its length.
This is used by replication to prevent replicating unacked log entries. See https://issues.apache.org/jira/browse/HBASE-14004 for more details.
getLogFileSizeIfBeingWritten
in interface WALFileLengthProvider
public long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException
WAL
key
will have the region edit/sequence id filled in.appendData
in interface WAL
info
- the regioninfo associated with appendkey
- Modified by this call; we add to it this edits region edit/sequence id.edits
- Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
sequence id that is after all currently appended edits.key
will have the region edit/sequence id
in it.IOException
WAL.appendMarker(RegionInfo, WALKeyImpl, WALEdit)
public long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException
WAL
WAL.appendData(RegionInfo, WALKeyImpl, WALEdit)
is that a marker will not have transitioned
through the memstore.
The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
have its region edit/sequence id assigned else it messes up our unification of mvcc and
sequenceid. On return key
will have the region edit/sequence id filled in.appendMarker
in interface WAL
info
- the regioninfo associated with appendkey
- Modified by this call; we add to it this edits region edit/sequence id.edits
- Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
sequence id that is after all currently appended edits.key
will have the region edit/sequence id
in it.IOException
WAL.appendData(RegionInfo, WALKeyImpl, WALEdit)
protected void markFutureDoneAndOffer(SyncFuture future, long txid, Throwable t)
private static boolean waitingRoll(int epochAndState)
private static boolean writerBroken(int epochAndState)
private static int epoch(int epochAndState)
private boolean trySetReadyForRolling()
private void syncFailed(long epochWhenSync, Throwable error)
private void onException(long epochWhenSync, Throwable error)
private void syncCompleted(long epochWhenSync, W writer, long processedTxid, long startTimeNs)
private boolean isHsync(long beginTxid, long endTxid)
protected long getSyncedTxid(long processedTxid, long completableFutureResult)
FSHLog
and AsyncFSWAL
. For AsyncFSWAL
, we use
highestProcessedAppendTxid
at the point we calling
AsyncFSWAL.doWriterSync(org.apache.hadoop.hbase.wal.WALProvider.AsyncWriter, boolean, long)
method as successful syncedTxid. For FSHLog
, because we
use multi-thread SyncRunner
s, we used the result of CompletableFuture
as
successful syncedTxid.protected abstract CompletableFuture<Long> doWriterSync(W writer, boolean shouldUseHsync, long txidWhenSyn)
private int finishSyncLowerThanTxid(long txid)
private int finishSync()
private static long getLastTxid(Deque<FSWALEntry> queue)
private void appendAndSync() throws IOException
IOException
private void consume()
private boolean shouldScheduleConsumer()
protected long append(RegionInfo hri, WALKeyImpl key, WALEdit edits, boolean inMemstore) throws IOException
key
will have the region edit/sequence id filled in.
NOTE: This appends, at a time that is usually after this call returns, starts a mvcc
transaction by calling 'begin' wherein which we assign this update a sequenceid. At assignment
time, we stamp all the passed in Cells inside WALEdit with their sequenceId. You must
'complete' the transaction this mvcc transaction by calling
MultiVersionConcurrencyControl#complete(...) or a variant otherwise mvcc will get stuck. Do it
in the finally of a try/finally block within which this appends lives and any subsequent
operations like sync or update of memstore, etc. Get the WriteEntry to pass mvcc out of the
passed in WALKey walKey
parameter. Be warned that the WriteEntry is not
immediately available on return from this method. It WILL be available subsequent to a sync of
this append; otherwise, you will just have to wait on the WriteEntry to get filled in.hri
- the regioninfo associated with appendkey
- Modified by this call; we add to it this edits region edit/sequence id.edits
- Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
sequence id that is after all currently appended edits.inMemstore
- Always true except for case where we are writing a region event meta marker
edit, for example, a compaction completion record into the WAL or noting a
Region Open event. In these cases the entry is just so we can finish an
unfinished compaction after a crash when the new Server reads the WAL on
recovery, etc. These transition event 'Markers' do not go via the memstore.
When memstore is false, we presume a Marker event edit.key
will have the region edit/sequence id
in it.IOException
protected void doSync(boolean forceSync) throws IOException
IOException
protected void doSync(long txid, boolean forceSync) throws IOException
IOException
private void drainNonMarkerEditsAndFailSyncs()
protected abstract W createWriterInstance(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOException, CommonFSUtils.StreamLacksCapabilityException
protected abstract W createCombinedWriter(W localWriter, W remoteWriter)
protected final void waitForSafePoint()
protected final void closeWriter(W writer, org.apache.hadoop.fs.Path path)
protected void doReplaceWriter(org.apache.hadoop.fs.Path oldPath, org.apache.hadoop.fs.Path newPath, W nextWriter) throws IOException
rollRequested
flag in this method, as the new writer
will begin to work before returning from this method. If we clear the flag after returning from
this call, we may miss a roll request. The implementation class should choose a proper place to
clear the rollRequested
flag, so we do not miss a roll request, typically before you
start writing to the new writer.IOException
protected abstract void onWriterReplaced(W nextWriter)
protected void doShutdown() throws IOException
IOException
protected void doCleanUpResources()
protected abstract void doAppend(W writer, FSWALEntry entry) throws IOException
IOException
abstract org.apache.hadoop.hdfs.protocol.DatanodeInfo[] getPipeline()
abstract int getLogReplication()
protected abstract boolean doCheckLogLowReplication()
protected boolean isWriterBroken()
private void onAppendEntryFailed(IOException exception)
protected void checkSlowSyncCount()
protected boolean doCheckSlowSync()
public void checkLogLowReplication(long checkInterval)
public void skipRemoteWAL(boolean markerEditOnly)
WAL
Used by sync replication for switching states from ACTIVE, where the remote cluster is broken.
skipRemoteWAL
in interface WAL
private static void split(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path p) throws IOException
IOException
private static void usage()
public static void main(String[] args) throws IOException
stdout
or split the specified log files.IOException
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.