001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
028import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
029import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
030import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost;
031import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.apache.yetus.audience.InterfaceStability;
034
035import static org.apache.commons.lang3.StringUtils.isNumeric;
036
037/**
038 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
039 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc).
040 *
041 * Note that some internals, such as log rolling and performance evaluation tools, will use
042 * WAL.equals to determine if they have already seen a given WAL.
043 */
044@InterfaceAudience.Private
045@InterfaceStability.Evolving
046public interface WAL extends Closeable, WALFileLengthProvider {
047
048  /**
049   * Registers WALActionsListener
050   */
051  void registerWALActionsListener(final WALActionsListener listener);
052
053  /**
054   * Unregisters WALActionsListener
055   */
056  boolean unregisterWALActionsListener(final WALActionsListener listener);
057
058  /**
059   * Roll the log writer. That is, start writing log messages to a new file.
060   *
061   * <p/>
062   * The implementation is synchronized in order to make sure there's one rollWriter
063   * running at any given time.
064   *
065   * @return If lots of logs, flush the stores of returned regions so next time through we
066   *         can clean logs. Returns null if nothing to flush. Names are actual
067   *         region names as returned by {@link RegionInfo#getEncodedName()}
068   */
069  Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException;
070
071  /**
072   * Roll the log writer. That is, start writing log messages to a new file.
073   *
074   * <p/>
075   * The implementation is synchronized in order to make sure there's one rollWriter
076   * running at any given time.
077   *
078   * @param force
079   *          If true, force creation of a new writer even if no entries have
080   *          been written to the current writer
081   * @return If lots of logs, flush the stores of returned regions so next time through we
082   *         can clean logs. Returns null if nothing to flush. Names are actual
083   *         region names as returned by {@link RegionInfo#getEncodedName()}
084   */
085  Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException;
086
087  /**
088   * Stop accepting new writes. If we have unsynced writes still in buffer, sync them.
089   * Extant edits are left in place in backing storage to be replayed later.
090   */
091  void shutdown() throws IOException;
092
093  /**
094   * Caller no longer needs any edits from this WAL. Implementers are free to reclaim
095   * underlying resources after this call; i.e. filesystem based WALs can archive or
096   * delete files.
097   */
098  @Override
099  void close() throws IOException;
100
101  /**
102   * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will
103   * also have transitioned through the memstore.
104   * <p/>
105   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
106   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
107   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
108   * @param info the regioninfo associated with append
109   * @param key Modified by this call; we add to it this edits region edit/sequence id.
110   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
111   *          sequence id that is after all currently appended edits.
112   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
113   *         in it.
114   * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit)
115   */
116  long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
117
118  /**
119   * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could
120   * be a FlushDescriptor, a compaction marker, or a region event marker; e.g. region open
121   * or region close. The difference between a 'marker' append and a 'data' append as in
122   * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have
123   * transitioned through the memstore.
124   * <p/>
125   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
126   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
127   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
128   * @param info the regioninfo associated with append
129   * @param key Modified by this call; we add to it this edits region edit/sequence id.
130   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
131   *          sequence id that is after all currently appended edits.
132   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
133   *         in it.
134   * @see #appendData(RegionInfo, WALKeyImpl, WALEdit)
135   */
136  long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
137
138  /**
139   * updates the seuence number of a specific store.
140   * depending on the flag: replaces current seq number if the given seq id is bigger,
141   * or even if it is lower than existing one
142   */
143  void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid,
144      boolean onlyIfGreater);
145
146  /**
147   * Sync what we have in the WAL.
148   */
149  void sync() throws IOException;
150
151  /**
152   * Sync the WAL if the txId was not already sync'd.
153   * @param txid Transaction id to sync to.
154   */
155  void sync(long txid) throws IOException;
156
157  /**
158   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
159   *          vs hsync.
160   */
161  default void sync(boolean forceSync) throws IOException {
162    sync();
163  }
164
165  /**
166   * @param txid Transaction id to sync to.
167   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
168   *          vs hsync.
169   */
170  default void sync(long txid, boolean forceSync) throws IOException {
171    sync(txid);
172  }
173
174  /**
175   * WAL keeps track of the sequence numbers that are as yet not flushed im memstores
176   * in order to be able to do accounting to figure which WALs can be let go. This method tells WAL
177   * that some region is about to flush. The flush can be the whole region or for a column family
178   * of the region only.
179   *
180   * <p>Currently, it is expected that the update lock is held for the region; i.e. no
181   * concurrent appends while we set up cache flush.
182   * @param families Families to flush. May be a subset of all families in the region.
183   * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if
184   * we are flushing a subset of all families but there are no edits in those families not
185   * being flushed; in other words, this is effectively same as a flush of all of the region
186   * though we were passed a subset of regions. Otherwise, it returns the sequence id of the
187   * oldest/lowest outstanding edit.
188   * @see #completeCacheFlush(byte[], long)
189   * @see #abortCacheFlush(byte[])
190   */
191  Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families);
192
193  Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq);
194
195  /**
196   * Complete the cache flush.
197   * @param encodedRegionName Encoded region name.
198   * @param maxFlushedSeqId The maxFlushedSeqId for this flush. There is no edit in memory that is
199   *          less that this sequence id.
200   * @see #startCacheFlush(byte[], Set)
201   * @see #abortCacheFlush(byte[])
202   */
203  void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId);
204
205  /**
206   * Abort a cache flush. Call if the flush fails. Note that the only recovery
207   * for an aborted flush currently is a restart of the regionserver so the
208   * snapshot content dropped by the failure gets restored to the memstore.
209   * @param encodedRegionName Encoded region name.
210   */
211  void abortCacheFlush(byte[] encodedRegionName);
212
213  /**
214   * @return Coprocessor host.
215   */
216  WALCoprocessorHost getCoprocessorHost();
217
218  /**
219   * Gets the earliest unflushed sequence id in the memstore for the region.
220   * @param encodedRegionName The region to get the number for.
221   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
222   * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal
223   * workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])}
224   */
225  @Deprecated
226  long getEarliestMemStoreSeqNum(byte[] encodedRegionName);
227
228  /**
229   * Gets the earliest unflushed sequence id in the memstore for the store.
230   * @param encodedRegionName The region to get the number for.
231   * @param familyName The family to get the number for.
232   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
233   */
234  long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName);
235
236  /**
237   * Human readable identifying information about the state of this WAL.
238   * Implementors are encouraged to include information appropriate for debugging.
239   * Consumers are advised not to rely on the details of the returned String; it does
240   * not have a defined structure.
241   */
242  @Override
243  String toString();
244
245  /**
246   * When outside clients need to consume persisted WALs, they rely on a provided
247   * Reader.
248   */
249  interface Reader extends Closeable {
250    Entry next() throws IOException;
251    Entry next(Entry reuse) throws IOException;
252    void seek(long pos) throws IOException;
253    long getPosition() throws IOException;
254    void reset() throws IOException;
255  }
256
257  /**
258   * Utility class that lets us keep track of the edit with it's key.
259   */
260  class Entry {
261    private final WALEdit edit;
262    private final WALKeyImpl key;
263
264    public Entry() {
265      this(new WALKeyImpl(), new WALEdit());
266    }
267
268    /**
269     * Constructor for both params
270     *
271     * @param edit log's edit
272     * @param key log's key
273     */
274    public Entry(WALKeyImpl key, WALEdit edit) {
275      this.key = key;
276      this.edit = edit;
277    }
278
279    /**
280     * Gets the edit
281     *
282     * @return edit
283     */
284    public WALEdit getEdit() {
285      return edit;
286    }
287
288    /**
289     * Gets the key
290     *
291     * @return key
292     */
293    public WALKeyImpl getKey() {
294      return key;
295    }
296
297    /**
298     * Set compression context for this entry.
299     *
300     * @param compressionContext
301     *          Compression context
302     * @deprecated deparcated since hbase 2.1.0
303     */
304    @Deprecated
305    public void setCompressionContext(CompressionContext compressionContext) {
306      key.setCompressionContext(compressionContext);
307    }
308
309    @Override
310    public String toString() {
311      return this.key + "=" + this.edit;
312    }
313  }
314
315  /**
316   * Split a WAL filename to get a start time. WALs usually have the time we start writing to them
317   * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
318   * is a WAL for the meta table. For example, WALs might look like this
319   * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
320   * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a
321   * synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have
322   * no timestamp on it. For example the recovered.edits files are WALs but are named in ascending
323   * order. Here is an example: 0000000000000016310. Allow for this.
324   * @param name Name of the WAL file.
325   * @return Timestamp or -1.
326   */
327  public static long getTimestamp(String name) {
328    String [] splits = name.split("\\.");
329    if (splits.length <= 1) {
330      return -1;
331    }
332    String timestamp = splits[splits.length - 1];
333    if (!isNumeric(timestamp)) {
334      // Its a '.meta' or a '.syncrep' suffix.
335      timestamp = splits[splits.length - 2];
336      if (!isNumeric(timestamp)) {
337        return -1;
338      }
339    }
340    return Long.parseLong(timestamp);
341  }
342}