001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import static org.apache.commons.lang3.StringUtils.isNumeric;
021
022import java.io.Closeable;
023import java.io.IOException;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
030import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
031import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
032import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost;
033import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.apache.yetus.audience.InterfaceStability;
036
037/**
038 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
039 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some
040 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine
041 * if they have already seen a given WAL.
042 */
043@InterfaceAudience.Private
044@InterfaceStability.Evolving
045public interface WAL extends Closeable, WALFileLengthProvider {
046
047  /**
048   * Registers WALActionsListener
049   */
050  void registerWALActionsListener(final WALActionsListener listener);
051
052  /**
053   * Unregisters WALActionsListener
054   */
055  boolean unregisterWALActionsListener(final WALActionsListener listener);
056
057  /**
058   * Roll the log writer. That is, start writing log messages to a new file.
059   * <p/>
060   * The implementation is synchronized in order to make sure there's one rollWriter running at any
061   * given time.
062   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
063   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
064   *         {@link RegionInfo#getEncodedName()}
065   */
066  Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException;
067
068  /**
069   * Roll the log writer. That is, start writing log messages to a new file.
070   * <p/>
071   * The implementation is synchronized in order to make sure there's one rollWriter running at any
072   * given time. n * If true, force creation of a new writer even if no entries have been written to
073   * the current writer
074   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
075   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
076   *         {@link RegionInfo#getEncodedName()}
077   */
078  Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException;
079
080  /**
081   * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits
082   * are left in place in backing storage to be replayed later.
083   */
084  void shutdown() throws IOException;
085
086  /**
087   * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying
088   * resources after this call; i.e. filesystem based WALs can archive or delete files.
089   */
090  @Override
091  void close() throws IOException;
092
093  /**
094   * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will
095   * also have transitioned through the memstore.
096   * <p/>
097   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
098   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
099   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
100   * @param info  the regioninfo associated with append
101   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
102   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
103   *              sequence id that is after all currently appended edits.
104   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
105   *         in it.
106   * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit)
107   */
108  long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
109
110  /**
111   * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a
112   * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region
113   * close. The difference between a 'marker' append and a 'data' append as in
114   * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned
115   * through the memstore.
116   * <p/>
117   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
118   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
119   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
120   * @param info  the regioninfo associated with append
121   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
122   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
123   *              sequence id that is after all currently appended edits.
124   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
125   *         in it.
126   * @see #appendData(RegionInfo, WALKeyImpl, WALEdit)
127   */
128  long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
129
130  /**
131   * updates the seuence number of a specific store. depending on the flag: replaces current seq
132   * number if the given seq id is bigger, or even if it is lower than existing one
133   */
134  void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid,
135    boolean onlyIfGreater);
136
137  /**
138   * Sync what we have in the WAL.
139   */
140  void sync() throws IOException;
141
142  /**
143   * Sync the WAL if the txId was not already sync'd.
144   * @param txid Transaction id to sync to.
145   */
146  void sync(long txid) throws IOException;
147
148  /**
149   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
150   *                  vs hsync.
151   */
152  default void sync(boolean forceSync) throws IOException {
153    sync();
154  }
155
156  /**
157   * @param txid      Transaction id to sync to.
158   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
159   *                  vs hsync.
160   */
161  default void sync(long txid, boolean forceSync) throws IOException {
162    sync(txid);
163  }
164
165  /**
166   * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be
167   * able to do accounting to figure which WALs can be let go. This method tells WAL that some
168   * region is about to flush. The flush can be the whole region or for a column family of the
169   * region only.
170   * <p>
171   * Currently, it is expected that the update lock is held for the region; i.e. no concurrent
172   * appends while we set up cache flush.
173   * @param families Families to flush. May be a subset of all families in the region.
174   * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are
175   *         flushing a subset of all families but there are no edits in those families not being
176   *         flushed; in other words, this is effectively same as a flush of all of the region
177   *         though we were passed a subset of regions. Otherwise, it returns the sequence id of the
178   *         oldest/lowest outstanding edit.
179   * @see #completeCacheFlush(byte[], long)
180   * @see #abortCacheFlush(byte[])
181   */
182  Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families);
183
184  Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq);
185
186  /**
187   * Complete the cache flush.
188   * @param encodedRegionName Encoded region name.
189   * @param maxFlushedSeqId   The maxFlushedSeqId for this flush. There is no edit in memory that is
190   *                          less that this sequence id.
191   * @see #startCacheFlush(byte[], Set)
192   * @see #abortCacheFlush(byte[])
193   */
194  void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId);
195
196  /**
197   * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush
198   * currently is a restart of the regionserver so the snapshot content dropped by the failure gets
199   * restored to the memstore.
200   * @param encodedRegionName Encoded region name.
201   */
202  void abortCacheFlush(byte[] encodedRegionName);
203
204  /** Returns Coprocessor host. */
205  WALCoprocessorHost getCoprocessorHost();
206
207  /**
208   * Gets the earliest unflushed sequence id in the memstore for the region.
209   * @param encodedRegionName The region to get the number for.
210   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
211   * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal
212   *             workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])}
213   */
214  @Deprecated
215  long getEarliestMemStoreSeqNum(byte[] encodedRegionName);
216
217  /**
218   * Gets the earliest unflushed sequence id in the memstore for the store.
219   * @param encodedRegionName The region to get the number for.
220   * @param familyName        The family to get the number for.
221   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
222   */
223  long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName);
224
225  /**
226   * Human readable identifying information about the state of this WAL. Implementors are encouraged
227   * to include information appropriate for debugging. Consumers are advised not to rely on the
228   * details of the returned String; it does not have a defined structure.
229   */
230  @Override
231  String toString();
232
233  /**
234   * When outside clients need to consume persisted WALs, they rely on a provided Reader.
235   */
236  interface Reader extends Closeable {
237    Entry next() throws IOException;
238
239    Entry next(Entry reuse) throws IOException;
240
241    void seek(long pos) throws IOException;
242
243    long getPosition() throws IOException;
244
245    void reset() throws IOException;
246  }
247
248  /**
249   * Utility class that lets us keep track of the edit with it's key.
250   */
251  class Entry {
252    private final WALEdit edit;
253    private final WALKeyImpl key;
254
255    public Entry() {
256      this(new WALKeyImpl(), new WALEdit());
257    }
258
259    /**
260     * Constructor for both params
261     * @param edit log's edit
262     * @param key  log's key
263     */
264    public Entry(WALKeyImpl key, WALEdit edit) {
265      this.key = key;
266      this.edit = edit;
267    }
268
269    /**
270     * Gets the edit n
271     */
272    public WALEdit getEdit() {
273      return edit;
274    }
275
276    /**
277     * Gets the key n
278     */
279    public WALKeyImpl getKey() {
280      return key;
281    }
282
283    /**
284     * Set compression context for this entry. n * Compression context
285     * @deprecated deparcated since hbase 2.1.0
286     */
287    @Deprecated
288    public void setCompressionContext(CompressionContext compressionContext) {
289      key.setCompressionContext(compressionContext);
290    }
291
292    @Override
293    public String toString() {
294      return this.key + "=" + this.edit;
295    }
296  }
297
298  /**
299   * Split a WAL filename to get a start time. WALs usually have the time we start writing to them
300   * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
301   * is a WAL for the meta table. For example, WALs might look like this
302   * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
303   * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a synchronous replication
304   * WAL which adds a '.syncrep' suffix. Check for these. File also may have no timestamp on it. For
305   * example the recovered.edits files are WALs but are named in ascending order. Here is an
306   * example: 0000000000000016310. Allow for this.
307   * @param name Name of the WAL file.
308   * @return Timestamp or -1.
309   */
310  public static long getTimestamp(String name) {
311    String[] splits = name.split("\\.");
312    if (splits.length <= 1) {
313      return -1;
314    }
315    String timestamp = splits[splits.length - 1];
316    if (!isNumeric(timestamp)) {
317      // Its a '.meta' or a '.syncrep' suffix.
318      timestamp = splits[splits.length - 2];
319      if (!isNumeric(timestamp)) {
320        return -1;
321      }
322    }
323    return Long.parseLong(timestamp);
324  }
325}