View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.List;
26  import java.util.UUID;
27  import java.util.concurrent.atomic.AtomicLong;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALTrailer;
43  import org.apache.hadoop.io.Writable;
44  
45  import com.google.common.annotations.VisibleForTesting;
46  
47  /**
48   * HLog records all the edits to HStore.  It is the hbase write-ahead-log (WAL).
49   */
50  @InterfaceAudience.Private
51  // TODO: Rename interface to WAL
52  public interface HLog {
53    Log LOG = LogFactory.getLog(HLog.class);
54    public static final long NO_SEQUENCE_ID = -1;
55  
56    /** File Extension used while splitting an HLog into regions (HBASE-2312) */
57    // TODO: this seems like an implementation detail that does not belong here.
58    String SPLITTING_EXT = "-splitting";
59    boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
60    /** The hbase:meta region's HLog filename extension.*/
61    // TODO: Implementation detail.  Does not belong in here.
62    String META_HLOG_FILE_EXTN = ".meta";
63  
64    /**
65     * Configuration name of HLog Trailer's warning size. If a waltrailer's size is greater than the
66     * configured size, a warning is logged. This is used with Protobuf reader/writer.
67     */
68    // TODO: Implementation detail.  Why in here?
69    String WAL_TRAILER_WARN_SIZE = "hbase.regionserver.waltrailer.warn.size";
70    int DEFAULT_WAL_TRAILER_WARN_SIZE = 1024 * 1024; // 1MB
71  
72    // TODO: Implementation detail.  Why in here?
73    Pattern EDITFILES_NAME_PATTERN = Pattern.compile("-?[0-9]+");
74    String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
75  
76    /**
77     * WAL Reader Interface
78     */
79    interface Reader {
80      /**
81       * @param fs File system.
82       * @param path Path.
83       * @param c Configuration.
84       * @param s Input stream that may have been pre-opened by the caller; may be null.
85       */
86      void init(FileSystem fs, Path path, Configuration c, FSDataInputStream s) throws IOException;
87  
88      void close() throws IOException;
89  
90      Entry next() throws IOException;
91  
92      Entry next(Entry reuse) throws IOException;
93  
94      void seek(long pos) throws IOException;
95  
96      long getPosition() throws IOException;
97      void reset() throws IOException;
98  
99      /**
100      * @return the WALTrailer of the current HLog. It may be null in case of legacy or corrupt WAL
101      * files.
102      */
103     // TODO: What we need a trailer on WAL for?  It won't be present on last WAL most of the time.
104     // What then?
105     WALTrailer getWALTrailer();
106   }
107 
108   /**
109    * WAL Writer Intrface.
110    */
111   interface Writer {
112     void init(FileSystem fs, Path path, Configuration c, boolean overwritable) throws IOException;
113 
114     void close() throws IOException;
115 
116     void sync() throws IOException;
117 
118     void append(Entry entry) throws IOException;
119 
120     long getLength() throws IOException;
121 
122     /**
123      * Sets HLog/WAL's WALTrailer. This trailer is appended at the end of WAL on closing.
124      * @param walTrailer trailer to append to WAL.
125      */
126     // TODO: Why a trailer on the log?
127     void setWALTrailer(WALTrailer walTrailer);
128   }
129 
130   /**
131    * Utility class that lets us keep track of the edit and it's associated key. Only used when
132    * splitting logs.
133    */
134   // TODO: Remove this Writable.
135   // TODO: Why is this in here?  Implementation detail?
136   @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.REPLICATION)
137   class Entry implements Writable {
138     private WALEdit edit;
139     private HLogKey key;
140 
141     public Entry() {
142       edit = new WALEdit();
143       key = new HLogKey();
144     }
145 
146     /**
147      * Constructor for both params
148      *
149      * @param edit log's edit
150      * @param key log's key
151      */
152     public Entry(HLogKey key, WALEdit edit) {
153       this.key = key;
154       this.edit = edit;
155     }
156 
157     /**
158      * Gets the edit
159      *
160      * @return edit
161      */
162     public WALEdit getEdit() {
163       return edit;
164     }
165 
166     /**
167      * Gets the key
168      *
169      * @return key
170      */
171     public HLogKey getKey() {
172       return key;
173     }
174 
175     /**
176      * Set compression context for this entry.
177      *
178      * @param compressionContext Compression context
179      */
180     public void setCompressionContext(CompressionContext compressionContext) {
181       edit.setCompressionContext(compressionContext);
182       key.setCompressionContext(compressionContext);
183     }
184 
185     @Override
186     public String toString() {
187       return this.key + "=" + this.edit;
188     }
189 
190     @Override
191     @SuppressWarnings("deprecation")
192     public void write(DataOutput dataOutput) throws IOException {
193       this.key.write(dataOutput);
194       this.edit.write(dataOutput);
195     }
196 
197     @Override
198     public void readFields(DataInput dataInput) throws IOException {
199       this.key.readFields(dataInput);
200       this.edit.readFields(dataInput);
201     }
202   }
203 
204   /**
205    * Registers WALActionsListener
206    *
207    * @param listener
208    */
209   void registerWALActionsListener(final WALActionsListener listener);
210 
211   /**
212    * Unregisters WALActionsListener
213    *
214    * @param listener
215    */
216   boolean unregisterWALActionsListener(final WALActionsListener listener);
217 
218   /**
219    * @return Current state of the monotonically increasing file id.
220    */
221   // TODO: Remove.  Implementation detail.
222   long getFilenum();
223 
224   /**
225    * @return the number of HLog files
226    */
227   int getNumLogFiles();
228 
229   /**
230    * @return the size of HLog files
231    */
232   long getLogFileSize();
233 
234   // TODO: Log rolling should not be in this interface.
235   /**
236    * Roll the log writer. That is, start writing log messages to a new file.
237    *
238    * <p>
239    * The implementation is synchronized in order to make sure there's one rollWriter
240    * running at any given time.
241    *
242    * @return If lots of logs, flush the returned regions so next time through we
243    *         can clean logs. Returns null if nothing to flush. Names are actual
244    *         region names as returned by {@link HRegionInfo#getEncodedName()}
245    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
246    * @throws IOException
247    */
248   byte[][] rollWriter() throws FailedLogCloseException, IOException;
249 
250   /**
251    * Roll the log writer. That is, start writing log messages to a new file.
252    *
253    * <p>
254    * The implementation is synchronized in order to make sure there's one rollWriter
255    * running at any given time.
256    *
257    * @param force
258    *          If true, force creation of a new writer even if no entries have
259    *          been written to the current writer
260    * @return If lots of logs, flush the returned regions so next time through we
261    *         can clean logs. Returns null if nothing to flush. Names are actual
262    *         region names as returned by {@link HRegionInfo#getEncodedName()}
263    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
264    * @throws IOException
265    */
266   byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException;
267 
268   /**
269    * Shut down the log.
270    *
271    * @throws IOException
272    */
273   void close() throws IOException;
274 
275   /**
276    * Shut down the log and delete the log directory.
277    * Used by tests only and in rare cases where we need a log just temporarily while bootstrapping
278    * a region or running migrations.
279    *
280    * @throws IOException
281    */
282   void closeAndDelete() throws IOException;
283 
284   /**
285    * Same as {@link #appendNoSync(HRegionInfo, TableName, WALEdit, List, long, HTableDescriptor,
286    *   AtomicLong, boolean, long, long)}
287    * except it causes a sync on the log
288    * @param info
289    * @param tableName
290    * @param edits
291    * @param now
292    * @param htd
293    * @param sequenceId
294    * @throws IOException
295    * @deprecated For tests only and even then, should use
296    * {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean,
297    * List)} and {@link #sync()} instead.
298    */
299   @Deprecated
300   @VisibleForTesting
301   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
302       final long now, HTableDescriptor htd, AtomicLong sequenceId) throws IOException;
303 
304   /**
305    * For notification post append to the writer.  Used by metrics system at least.
306    * @param entry
307    * @param elapsedTime
308    * @return Size of this append.
309    */
310   long postAppend(final Entry entry, final long elapsedTime);
311 
312   /**
313    * For notification post writer sync.  Used by metrics system at least.
314    * @param timeInMillis How long the filesystem sync took in milliseconds.
315    * @param handlerSyncs How many sync handler calls were released by this call to filesystem
316    * sync.
317    */
318   void postSync(final long timeInMillis, final int handlerSyncs);
319 
320   /**
321    * Append a set of edits to the WAL. WAL edits are keyed by (encoded) regionName, rowname, and
322    * log-sequence-id. The WAL is not flushed/sync'd after this transaction completes BUT on return
323    * this edit must have its region edit/sequence id assigned else it messes up our unification
324    * of mvcc and sequenceid.
325    * @param info
326    * @param tableName
327    * @param edits
328    * @param clusterIds
329    * @param now
330    * @param htd
331    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
332    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
333    * attach the sequence to the edit we apply the WAL.
334    * @param isInMemstore Always true except for case where we are writing a compaction completion
335    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
336    * -- it is not an edit for memstore.
337    * @param nonceGroup
338    * @param nonce
339    * @return Returns a 'transaction id'.  Do not use. This is an internal implementation detail and
340    * cannot be respected in all implementations; i.e. the append/sync machine may or may not be
341    * able to sync an explicit edit only (the current default implementation syncs up to the time
342    * of the sync call syncing whatever is behind the sync).
343    * @throws IOException
344    * @deprecated Use {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean, List)}
345    * instead because you can get back the region edit/sequenceid; it is set into the passed in
346    * <code>key</code>.
347    */
348   @Deprecated
349   long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
350       List<UUID> clusterIds, final long now, HTableDescriptor htd, AtomicLong sequenceId,
351       boolean isInMemstore, long nonceGroup, long nonce) throws IOException;
352 
353   /**
354    * Append a set of edits to the WAL. The WAL is not flushed/sync'd after this transaction
355    * completes BUT on return this edit must have its region edit/sequence id assigned
356    * else it messes up our unification of mvcc and sequenceid.  On return <code>key</code> will
357    * have the region edit/sequence id filled in.
358    * @param info
359    * @param key Modified by this call; we add to it this edits region edit/sequence id.
360    * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
361    * sequence id that is after all currently appended edits.
362    * @param htd
363    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
364    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
365    * attach the sequence to the edit we apply the WAL.
366    * @param inMemstore Always true except for case where we are writing a compaction completion
367    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
368    * -- it is not an edit for memstore.
369    * @param memstoreKVs list of KVs added into memstore
370    * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
371    * in it.
372    * @throws IOException
373    */
374   long appendNoSync(HTableDescriptor htd, HRegionInfo info, HLogKey key, WALEdit edits,
375       AtomicLong sequenceId, boolean inMemstore, List<KeyValue> memstoreKVs)
376   throws IOException;
377 
378   // TODO: Do we need all these versions of sync?
379   void hsync() throws IOException;
380 
381   void hflush() throws IOException;
382 
383   /**
384    * Sync what we have in the WAL.
385    * @throws IOException
386    */
387   void sync() throws IOException;
388 
389   /**
390    * Sync the WAL if the txId was not already sync'd.
391    * @param txid Transaction id to sync to.
392    * @throws IOException
393    */
394   void sync(long txid) throws IOException;
395 
396   /**
397    * WAL keeps track of the sequence numbers that were not yet flushed from memstores
398    * in order to be able to do cleanup. This method tells WAL that some region is about
399    * to flush memstore.
400    *
401    * <p>We stash the oldest seqNum for the region, and let the the next edit inserted in this
402    * region be recorded in {@link #append(HRegionInfo, TableName, WALEdit, long, HTableDescriptor,
403    * AtomicLong)} as new oldest seqnum.
404    * In case of flush being aborted, we put the stashed value back; in case of flush succeeding,
405    * the seqNum of that first edit after start becomes the valid oldest seqNum for this region.
406    *
407    * @return true if the flush can proceed, false in case wal is closing (ususally, when server is
408    * closing) and flush couldn't be started.
409    */
410   boolean startCacheFlush(final byte[] encodedRegionName);
411 
412   /**
413    * Complete the cache flush.
414    * @param encodedRegionName Encoded region name.
415    */
416   void completeCacheFlush(final byte[] encodedRegionName);
417 
418   /**
419    * Abort a cache flush. Call if the flush fails. Note that the only recovery
420    * for an aborted flush currently is a restart of the regionserver so the
421    * snapshot content dropped by the failure gets restored to the memstore.v
422    * @param encodedRegionName Encoded region name.
423    */
424   void abortCacheFlush(byte[] encodedRegionName);
425 
426   /**
427    * @return Coprocessor host.
428    */
429   WALCoprocessorHost getCoprocessorHost();
430 
431   /**
432    * Get LowReplication-Roller status
433    *
434    * @return lowReplicationRollEnabled
435    */
436   // TODO: This is implementation detail?
437   boolean isLowReplicationRollEnabled();
438 
439   /** Gets the earliest sequence number in the memstore for this particular region.
440    * This can serve as best-effort "recent" WAL number for this region.
441    * @param encodedRegionName The region to get the number for.
442    * @return The number if present, HConstants.NO_SEQNUM if absent.
443    */
444   long getEarliestMemstoreSeqNum(byte[] encodedRegionName);
445 }