001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
028import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
029import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost;
030import org.apache.hadoop.hbase.regionserver.wal.WALSyncTimeoutIOException;
031import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.apache.yetus.audience.InterfaceStability;
034
035/**
036 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
037 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some
038 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine
039 * if they have already seen a given WAL.
040 */
041@InterfaceAudience.Private
042@InterfaceStability.Evolving
043public interface WAL extends Closeable, WALFileLengthProvider {
044
045  /**
046   * Used to initialize the WAL. Usually this is for creating the first writer.
047   */
048  default void init() throws IOException {
049  }
050
051  /**
052   * Registers WALActionsListener
053   */
054  void registerWALActionsListener(final WALActionsListener listener);
055
056  /**
057   * Unregisters WALActionsListener
058   */
059  boolean unregisterWALActionsListener(final WALActionsListener listener);
060
061  /**
062   * Roll the log writer. That is, start writing log messages to a new file.
063   * <p/>
064   * The implementation is synchronized in order to make sure there's one rollWriter running at any
065   * given time.
066   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
067   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
068   *         {@link RegionInfo#getEncodedName()}
069   */
070  Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException;
071
072  /**
073   * Roll the log writer. That is, start writing log messages to a new file.
074   * <p/>
075   * The implementation is synchronized in order to make sure there's one rollWriter running at any
076   * given time. If true, force creation of a new writer even if no entries have been written to the
077   * current writer
078   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
079   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
080   *         {@link RegionInfo#getEncodedName()}
081   */
082  Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException;
083
084  /**
085   * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits
086   * are left in place in backing storage to be replayed later.
087   */
088  void shutdown() throws IOException;
089
090  /**
091   * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying
092   * resources after this call; i.e. filesystem based WALs can archive or delete files.
093   */
094  @Override
095  void close() throws IOException;
096
097  /**
098   * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will
099   * also have transitioned through the memstore.
100   * <p/>
101   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
102   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
103   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
104   * @param info  the regioninfo associated with append
105   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
106   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
107   *              sequence id that is after all currently appended edits.
108   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
109   *         in it.
110   * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit)
111   */
112  long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
113
114  /**
115   * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a
116   * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region
117   * close. The difference between a 'marker' append and a 'data' append as in
118   * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned
119   * through the memstore.
120   * <p/>
121   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
122   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
123   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
124   * @param info  the regioninfo associated with append
125   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
126   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
127   *              sequence id that is after all currently appended edits.
128   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
129   *         in it.
130   * @see #appendData(RegionInfo, WALKeyImpl, WALEdit)
131   */
132  long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
133
134  /**
135   * updates the seuence number of a specific store. depending on the flag: replaces current seq
136   * number if the given seq id is bigger, or even if it is lower than existing one
137   */
138  void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid,
139    boolean onlyIfGreater);
140
141  /**
142   * Sync what we have in the WAL.
143   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
144   */
145  void sync() throws IOException;
146
147  /**
148   * Sync the WAL if the txId was not already sync'd.
149   * @param txid Transaction id to sync to.
150   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
151   */
152  void sync(long txid) throws IOException;
153
154  /**
155   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
156   *                  vs hsync.
157   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
158   */
159  default void sync(boolean forceSync) throws IOException {
160    sync();
161  }
162
163  /**
164   * @param txid      Transaction id to sync to.
165   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
166   *                  vs hsync.
167   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
168   */
169  default void sync(long txid, boolean forceSync) throws IOException {
170    sync(txid);
171  }
172
173  /**
174   * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be
175   * able to do accounting to figure which WALs can be let go. This method tells WAL that some
176   * region is about to flush. The flush can be the whole region or for a column family of the
177   * region only.
178   * <p>
179   * Currently, it is expected that the update lock is held for the region; i.e. no concurrent
180   * appends while we set up cache flush.
181   * @param families Families to flush. May be a subset of all families in the region.
182   * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are
183   *         flushing a subset of all families but there are no edits in those families not being
184   *         flushed; in other words, this is effectively same as a flush of all of the region
185   *         though we were passed a subset of regions. Otherwise, it returns the sequence id of the
186   *         oldest/lowest outstanding edit.
187   * @see #completeCacheFlush(byte[], long)
188   * @see #abortCacheFlush(byte[])
189   */
190  Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families);
191
192  Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq);
193
194  /**
195   * Complete the cache flush.
196   * @param encodedRegionName Encoded region name.
197   * @param maxFlushedSeqId   The maxFlushedSeqId for this flush. There is no edit in memory that is
198   *                          less that this sequence id.
199   * @see #startCacheFlush(byte[], Set)
200   * @see #abortCacheFlush(byte[])
201   */
202  void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId);
203
204  /**
205   * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush
206   * currently is a restart of the regionserver so the snapshot content dropped by the failure gets
207   * restored to the memstore.
208   * @param encodedRegionName Encoded region name.
209   */
210  void abortCacheFlush(byte[] encodedRegionName);
211
212  /** Returns Coprocessor host. */
213  WALCoprocessorHost getCoprocessorHost();
214
215  /**
216   * Gets the earliest unflushed sequence id in the memstore for the store.
217   * @param encodedRegionName The region to get the number for.
218   * @param familyName        The family to get the number for.
219   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
220   */
221  long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName);
222
223  /**
224   * Tell the WAL that when creating new writer you can skip creating the remote writer.
225   * <p>
226   * Used by sync replication for switching states from ACTIVE, where the remote cluster is broken.
227   */
228  default void skipRemoteWAL(boolean markerEditOnly) {
229  }
230
231  /**
232   * Human readable identifying information about the state of this WAL. Implementors are encouraged
233   * to include information appropriate for debugging. Consumers are advised not to rely on the
234   * details of the returned String; it does not have a defined structure.
235   */
236  @Override
237  String toString();
238
239  /**
240   * Utility class that lets us keep track of the edit with it's key.
241   */
242  class Entry {
243    private final WALEdit edit;
244    private final WALKeyImpl key;
245
246    public Entry() {
247      this(new WALKeyImpl(), new WALEdit());
248    }
249
250    /**
251     * Constructor for both params
252     * @param edit log's edit
253     * @param key  log's key
254     */
255    public Entry(WALKeyImpl key, WALEdit edit) {
256      this.key = key;
257      this.edit = edit;
258    }
259
260    /**
261     * Gets the edit
262     */
263    public WALEdit getEdit() {
264      return edit;
265    }
266
267    /**
268     * Gets the key
269     */
270    public WALKeyImpl getKey() {
271      return key;
272    }
273
274    @Override
275    public String toString() {
276      return this.key + "=" + this.edit;
277    }
278  }
279}