View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.List;
26  import java.util.UUID;
27  import java.util.concurrent.atomic.AtomicLong;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.Cell;
38  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALTrailer;
43  import org.apache.hadoop.io.Writable;
44  
45  import com.google.common.annotations.VisibleForTesting;
46  
47  /**
48   * HLog records all the edits to HStore.  It is the hbase write-ahead-log (WAL).
49   */
50  @InterfaceAudience.Private
51  // TODO: Rename interface to WAL
52  public interface HLog {
53    Log LOG = LogFactory.getLog(HLog.class);
54    public static final long NO_SEQUENCE_ID = -1;
55  
56    /** File Extension used while splitting an HLog into regions (HBASE-2312) */
57    // TODO: this seems like an implementation detail that does not belong here.
58    String SPLITTING_EXT = "-splitting";
59    boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
60    /** The hbase:meta region's HLog filename extension.*/
61    // TODO: Implementation detail.  Does not belong in here.
62    String META_HLOG_FILE_EXTN = ".meta";
63  
64    /**
65     * Configuration name of HLog Trailer's warning size. If a waltrailer's size is greater than the
66     * configured size, a warning is logged. This is used with Protobuf reader/writer.
67     */
68    // TODO: Implementation detail.  Why in here?
69    String WAL_TRAILER_WARN_SIZE = "hbase.regionserver.waltrailer.warn.size";
70    int DEFAULT_WAL_TRAILER_WARN_SIZE = 1024 * 1024; // 1MB
71  
72    // TODO: Implementation detail.  Why in here?
73    Pattern EDITFILES_NAME_PATTERN = Pattern.compile("-?[0-9]+");
74    String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
75    String SEQUENCE_ID_FILE_SUFFIX = "_seqid";
76  
77    /**
78     * WAL Reader Interface
79     */
80    interface Reader {
81      /**
82       * @param fs File system.
83       * @param path Path.
84       * @param c Configuration.
85       * @param s Input stream that may have been pre-opened by the caller; may be null.
86       */
87      void init(FileSystem fs, Path path, Configuration c, FSDataInputStream s) throws IOException;
88  
89      void close() throws IOException;
90  
91      Entry next() throws IOException;
92  
93      Entry next(Entry reuse) throws IOException;
94  
95      void seek(long pos) throws IOException;
96  
97      long getPosition() throws IOException;
98      void reset() throws IOException;
99  
100     /**
101      * @return the WALTrailer of the current HLog. It may be null in case of legacy or corrupt WAL
102      * files.
103      */
104     // TODO: What we need a trailer on WAL for?  It won't be present on last WAL most of the time.
105     // What then?
106     WALTrailer getWALTrailer();
107   }
108 
109   /**
110    * WAL Writer Intrface.
111    */
112   interface Writer {
113     void init(FileSystem fs, Path path, Configuration c, boolean overwritable) throws IOException;
114 
115     void close() throws IOException;
116 
117     void sync() throws IOException;
118 
119     void append(Entry entry) throws IOException;
120 
121     long getLength() throws IOException;
122 
123     /**
124      * Sets HLog/WAL's WALTrailer. This trailer is appended at the end of WAL on closing.
125      * @param walTrailer trailer to append to WAL.
126      */
127     // TODO: Why a trailer on the log?
128     void setWALTrailer(WALTrailer walTrailer);
129   }
130 
131   /**
132    * Utility class that lets us keep track of the edit and it's associated key. Only used when
133    * splitting logs.
134    */
135   // TODO: Remove this Writable.
136   // TODO: Why is this in here?  Implementation detail?
137   @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.REPLICATION)
138   class Entry implements Writable {
139     private WALEdit edit;
140     private HLogKey key;
141 
142     public Entry() {
143       edit = new WALEdit();
144       key = new HLogKey();
145     }
146 
147     /**
148      * Constructor for both params
149      *
150      * @param edit log's edit
151      * @param key log's key
152      */
153     public Entry(HLogKey key, WALEdit edit) {
154       this.key = key;
155       this.edit = edit;
156     }
157 
158     /**
159      * Gets the edit
160      *
161      * @return edit
162      */
163     public WALEdit getEdit() {
164       return edit;
165     }
166 
167     /**
168      * Gets the key
169      *
170      * @return key
171      */
172     public HLogKey getKey() {
173       return key;
174     }
175 
176     /**
177      * Set compression context for this entry.
178      *
179      * @param compressionContext Compression context
180      */
181     public void setCompressionContext(CompressionContext compressionContext) {
182       edit.setCompressionContext(compressionContext);
183       key.setCompressionContext(compressionContext);
184     }
185 
186     @Override
187     public String toString() {
188       return this.key + "=" + this.edit;
189     }
190 
191     @Override
192     @SuppressWarnings("deprecation")
193     public void write(DataOutput dataOutput) throws IOException {
194       this.key.write(dataOutput);
195       this.edit.write(dataOutput);
196     }
197 
198     @Override
199     public void readFields(DataInput dataInput) throws IOException {
200       this.key.readFields(dataInput);
201       this.edit.readFields(dataInput);
202     }
203   }
204 
205   /**
206    * Registers WALActionsListener
207    *
208    * @param listener
209    */
210   void registerWALActionsListener(final WALActionsListener listener);
211 
212   /**
213    * Unregisters WALActionsListener
214    *
215    * @param listener
216    */
217   boolean unregisterWALActionsListener(final WALActionsListener listener);
218 
219   /**
220    * @return Current state of the monotonically increasing file id.
221    */
222   // TODO: Remove.  Implementation detail.
223   long getFilenum();
224 
225   /**
226    * @return the number of HLog files
227    */
228   int getNumLogFiles();
229 
230   /**
231    * @return the size of HLog files
232    */
233   long getLogFileSize();
234 
235   // TODO: Log rolling should not be in this interface.
236   /**
237    * Roll the log writer. That is, start writing log messages to a new file.
238    *
239    * <p>
240    * The implementation is synchronized in order to make sure there's one rollWriter
241    * running at any given time.
242    *
243    * @return If lots of logs, flush the returned regions so next time through we
244    *         can clean logs. Returns null if nothing to flush. Names are actual
245    *         region names as returned by {@link HRegionInfo#getEncodedName()}
246    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
247    * @throws IOException
248    */
249   byte[][] rollWriter() throws FailedLogCloseException, IOException;
250 
251   /**
252    * Roll the log writer. That is, start writing log messages to a new file.
253    *
254    * <p>
255    * The implementation is synchronized in order to make sure there's one rollWriter
256    * running at any given time.
257    *
258    * @param force
259    *          If true, force creation of a new writer even if no entries have
260    *          been written to the current writer
261    * @return If lots of logs, flush the returned regions so next time through we
262    *         can clean logs. Returns null if nothing to flush. Names are actual
263    *         region names as returned by {@link HRegionInfo#getEncodedName()}
264    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
265    * @throws IOException
266    */
267   byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException;
268 
269   /**
270    * Shut down the log.
271    *
272    * @throws IOException
273    */
274   void close() throws IOException;
275 
276   /**
277    * Shut down the log and delete the log directory.
278    * Used by tests only and in rare cases where we need a log just temporarily while bootstrapping
279    * a region or running migrations.
280    *
281    * @throws IOException
282    */
283   void closeAndDelete() throws IOException;
284 
285   /**
286    * Same as {@link #appendNoSync(HRegionInfo, TableName, WALEdit, List, long, HTableDescriptor,
287    *   AtomicLong, boolean, long, long)}
288    * except it causes a sync on the log
289    * @param info
290    * @param tableName
291    * @param edits
292    * @param now
293    * @param htd
294    * @param sequenceId
295    * @throws IOException
296    * @deprecated For tests only and even then, should use
297    * {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean,
298    * List)} and {@link #sync()} instead.
299    */
300   @Deprecated
301   @VisibleForTesting
302   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
303       final long now, HTableDescriptor htd, AtomicLong sequenceId) throws IOException;
304 
305   /**
306    * For notification post append to the writer.  Used by metrics system at least.
307    * @param entry
308    * @param elapsedTime
309    * @return Size of this append.
310    */
311   long postAppend(final Entry entry, final long elapsedTime);
312 
313   /**
314    * For notification post writer sync.  Used by metrics system at least.
315    * @param timeInMillis How long the filesystem sync took in milliseconds.
316    * @param handlerSyncs How many sync handler calls were released by this call to filesystem
317    * sync.
318    */
319   void postSync(final long timeInMillis, final int handlerSyncs);
320 
321   /**
322    * Append a set of edits to the WAL. WAL edits are keyed by (encoded) regionName, rowname, and
323    * log-sequence-id. The WAL is not flushed/sync'd after this transaction completes BUT on return
324    * this edit must have its region edit/sequence id assigned else it messes up our unification
325    * of mvcc and sequenceid.
326    * @param info
327    * @param tableName
328    * @param edits
329    * @param clusterIds
330    * @param now
331    * @param htd
332    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
333    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
334    * attach the sequence to the edit we apply the WAL.
335    * @param isInMemstore Always true except for case where we are writing a compaction completion
336    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
337    * -- it is not an edit for memstore.
338    * @param nonceGroup
339    * @param nonce
340    * @return Returns a 'transaction id'.  Do not use. This is an internal implementation detail and
341    * cannot be respected in all implementations; i.e. the append/sync machine may or may not be
342    * able to sync an explicit edit only (the current default implementation syncs up to the time
343    * of the sync call syncing whatever is behind the sync).
344    * @throws IOException
345    * @deprecated Use {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean, List)}
346    * instead because you can get back the region edit/sequenceid; it is set into the passed in
347    * <code>key</code>.
348    */
349   @Deprecated
350   long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
351       List<UUID> clusterIds, final long now, HTableDescriptor htd, AtomicLong sequenceId,
352       boolean isInMemstore, long nonceGroup, long nonce) throws IOException;
353 
354   /**
355    * Append a set of edits to the WAL. The WAL is not flushed/sync'd after this transaction
356    * completes BUT on return this edit must have its region edit/sequence id assigned
357    * else it messes up our unification of mvcc and sequenceid.  On return <code>key</code> will
358    * have the region edit/sequence id filled in.
359    * @param info
360    * @param key Modified by this call; we add to it this edits region edit/sequence id.
361    * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
362    * sequence id that is after all currently appended edits.
363    * @param htd
364    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
365    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
366    * attach the sequence to the edit we apply the WAL.
367    * @param inMemstore Always true except for case where we are writing a compaction completion
368    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
369    * -- it is not an edit for memstore.
370    * @param memstoreCells list of Cells added into memstore
371    * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
372    * in it.
373    * @throws IOException
374    */
375   long appendNoSync(HTableDescriptor htd, HRegionInfo info, HLogKey key, WALEdit edits,
376       AtomicLong sequenceId, boolean inMemstore, List<Cell> memstoreCells) throws IOException;
377 
378   // TODO: Do we need all these versions of sync?
379   void hsync() throws IOException;
380 
381   void hflush() throws IOException;
382 
383   /**
384    * Sync what we have in the WAL.
385    * @throws IOException
386    */
387   void sync() throws IOException;
388 
389   /**
390    * Sync the WAL if the txId was not already sync'd.
391    * @param txid Transaction id to sync to.
392    * @throws IOException
393    */
394   void sync(long txid) throws IOException;
395 
396   /**
397    * WAL keeps track of the sequence numbers that were not yet flushed from memstores
398    * in order to be able to do cleanup. This method tells WAL that some region is about
399    * to flush memstore.
400    *
401    * <p>We stash the oldest seqNum for the region, and let the the next edit inserted in this
402    * region be recorded in {@link #append(HRegionInfo, TableName, WALEdit, long, HTableDescriptor,
403    * AtomicLong)} as new oldest seqnum.
404    * In case of flush being aborted, we put the stashed value back; in case of flush succeeding,
405    * the seqNum of that first edit after start becomes the valid oldest seqNum for this region.
406    *
407    * @return true if the flush can proceed, false in case wal is closing (ususally, when server is
408    * closing) and flush couldn't be started.
409    */
410   boolean startCacheFlush(final byte[] encodedRegionName);
411 
412   /**
413    * Complete the cache flush.
414    * @param encodedRegionName Encoded region name.
415    */
416   void completeCacheFlush(final byte[] encodedRegionName);
417 
418   /**
419    * Abort a cache flush. Call if the flush fails. Note that the only recovery
420    * for an aborted flush currently is a restart of the regionserver so the
421    * snapshot content dropped by the failure gets restored to the memstore.v
422    * @param encodedRegionName Encoded region name.
423    */
424   void abortCacheFlush(byte[] encodedRegionName);
425 
426   /**
427    * @return Coprocessor host.
428    */
429   WALCoprocessorHost getCoprocessorHost();
430 
431   /**
432    * Get LowReplication-Roller status
433    *
434    * @return lowReplicationRollEnabled
435    */
436   // TODO: This is implementation detail?
437   boolean isLowReplicationRollEnabled();
438 
439   /** Gets the earliest sequence number in the memstore for this particular region.
440    * This can serve as best-effort "recent" WAL number for this region.
441    * @param encodedRegionName The region to get the number for.
442    * @return The number if present, HConstants.NO_SEQNUM if absent.
443    */
444   long getEarliestMemstoreSeqNum(byte[] encodedRegionName);
445 }