View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.List;
26  import java.util.UUID;
27  import java.util.concurrent.atomic.AtomicLong;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.Cell;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALTrailer;
43  import org.apache.hadoop.io.Writable;
44  
45  import com.google.common.annotations.VisibleForTesting;
46  
47  /**
48   * HLog records all the edits to HStore.  It is the hbase write-ahead-log (WAL).
49   */
50  @InterfaceAudience.Private
51  // TODO: Rename interface to WAL
52  public interface HLog {
53    Log LOG = LogFactory.getLog(HLog.class);
54    public static final long NO_SEQUENCE_ID = -1;
55  
56    /** File Extension used while splitting an HLog into regions (HBASE-2312) */
57    // TODO: this seems like an implementation detail that does not belong here.
58    String SPLITTING_EXT = "-splitting";
59    boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
60    /** The hbase:meta region's HLog filename extension.*/
61    // TODO: Implementation detail.  Does not belong in here.
62    String META_HLOG_FILE_EXTN = ".meta";
63  
64    /**
65     * Configuration name of HLog Trailer's warning size. If a waltrailer's size is greater than the
66     * configured size, a warning is logged. This is used with Protobuf reader/writer.
67     */
68    // TODO: Implementation detail.  Why in here?
69    String WAL_TRAILER_WARN_SIZE = "hbase.regionserver.waltrailer.warn.size";
70    int DEFAULT_WAL_TRAILER_WARN_SIZE = 1024 * 1024; // 1MB
71  
72    // TODO: Implementation detail.  Why in here?
73    Pattern EDITFILES_NAME_PATTERN = Pattern.compile("-?[0-9]+");
74    String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
75  
76    /**
77     * WAL Reader Interface
78     */
79    interface Reader {
80      /**
81       * @param fs File system.
82       * @param path Path.
83       * @param c Configuration.
84       * @param s Input stream that may have been pre-opened by the caller; may be null.
85       */
86      void init(FileSystem fs, Path path, Configuration c, FSDataInputStream s) throws IOException;
87  
88      void close() throws IOException;
89  
90      Entry next() throws IOException;
91  
92      Entry next(Entry reuse) throws IOException;
93  
94      void seek(long pos) throws IOException;
95  
96      long getPosition() throws IOException;
97      void reset() throws IOException;
98  
99      /**
100      * @return the WALTrailer of the current HLog. It may be null in case of legacy or corrupt WAL
101      * files.
102      */
103     // TODO: What we need a trailer on WAL for?  It won't be present on last WAL most of the time.
104     // What then?
105     WALTrailer getWALTrailer();
106   }
107 
108   /**
109    * WAL Writer Intrface.
110    */
111   interface Writer {
112     void init(FileSystem fs, Path path, Configuration c, boolean overwritable) throws IOException;
113 
114     void close() throws IOException;
115 
116     void sync() throws IOException;
117 
118     void append(Entry entry) throws IOException;
119 
120     long getLength() throws IOException;
121 
122     /**
123      * Sets HLog/WAL's WALTrailer. This trailer is appended at the end of WAL on closing.
124      * @param walTrailer trailer to append to WAL.
125      */
126     // TODO: Why a trailer on the log?
127     void setWALTrailer(WALTrailer walTrailer);
128   }
129 
130   /**
131    * Utility class that lets us keep track of the edit and it's associated key. Only used when
132    * splitting logs.
133    */
134   // TODO: Remove this Writable.
135   // TODO: Why is this in here?  Implementation detail?
136   class Entry implements Writable {
137     private WALEdit edit;
138     private HLogKey key;
139 
140     public Entry() {
141       edit = new WALEdit();
142       key = new HLogKey();
143     }
144 
145     /**
146      * Constructor for both params
147      *
148      * @param edit log's edit
149      * @param key log's key
150      */
151     public Entry(HLogKey key, WALEdit edit) {
152       this.key = key;
153       this.edit = edit;
154     }
155 
156     /**
157      * Gets the edit
158      *
159      * @return edit
160      */
161     public WALEdit getEdit() {
162       return edit;
163     }
164 
165     /**
166      * Gets the key
167      *
168      * @return key
169      */
170     public HLogKey getKey() {
171       return key;
172     }
173 
174     /**
175      * Set compression context for this entry.
176      *
177      * @param compressionContext Compression context
178      */
179     public void setCompressionContext(CompressionContext compressionContext) {
180       edit.setCompressionContext(compressionContext);
181       key.setCompressionContext(compressionContext);
182     }
183 
184     @Override
185     public String toString() {
186       return this.key + "=" + this.edit;
187     }
188 
189     @Override
190     @SuppressWarnings("deprecation")
191     public void write(DataOutput dataOutput) throws IOException {
192       this.key.write(dataOutput);
193       this.edit.write(dataOutput);
194     }
195 
196     @Override
197     public void readFields(DataInput dataInput) throws IOException {
198       this.key.readFields(dataInput);
199       this.edit.readFields(dataInput);
200     }
201   }
202 
203   /**
204    * Registers WALActionsListener
205    *
206    * @param listener
207    */
208   void registerWALActionsListener(final WALActionsListener listener);
209 
210   /**
211    * Unregisters WALActionsListener
212    *
213    * @param listener
214    */
215   boolean unregisterWALActionsListener(final WALActionsListener listener);
216 
217   /**
218    * @return Current state of the monotonically increasing file id.
219    */
220   // TODO: Remove.  Implementation detail.
221   long getFilenum();
222 
223   /**
224    * @return the number of HLog files
225    */
226   int getNumLogFiles();
227   
228   /**
229    * @return the size of HLog files
230    */
231   long getLogFileSize();
232 
233   // TODO: Log rolling should not be in this interface.
234   /**
235    * Roll the log writer. That is, start writing log messages to a new file.
236    *
237    * <p>
238    * The implementation is synchronized in order to make sure there's one rollWriter
239    * running at any given time.
240    *
241    * @return If lots of logs, flush the returned regions so next time through we
242    *         can clean logs. Returns null if nothing to flush. Names are actual
243    *         region names as returned by {@link HRegionInfo#getEncodedName()}
244    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
245    * @throws IOException
246    */
247   byte[][] rollWriter() throws FailedLogCloseException, IOException;
248 
249   /**
250    * Roll the log writer. That is, start writing log messages to a new file.
251    *
252    * <p>
253    * The implementation is synchronized in order to make sure there's one rollWriter
254    * running at any given time.
255    *
256    * @param force
257    *          If true, force creation of a new writer even if no entries have
258    *          been written to the current writer
259    * @return If lots of logs, flush the returned regions so next time through we
260    *         can clean logs. Returns null if nothing to flush. Names are actual
261    *         region names as returned by {@link HRegionInfo#getEncodedName()}
262    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
263    * @throws IOException
264    */
265   byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException;
266 
267   /**
268    * Shut down the log.
269    *
270    * @throws IOException
271    */
272   void close() throws IOException;
273 
274   /**
275    * Shut down the log and delete the log directory.
276    * Used by tests only and in rare cases where we need a log just temporarily while bootstrapping
277    * a region or running migrations.
278    *
279    * @throws IOException
280    */
281   void closeAndDelete() throws IOException;
282 
283   /**
284    * Same as {@link #appendNoSync(HRegionInfo, TableName, WALEdit, List, long, HTableDescriptor,
285    *   AtomicLong, boolean, long, long)}
286    * except it causes a sync on the log
287    * @param info
288    * @param tableName
289    * @param edits
290    * @param now
291    * @param htd
292    * @param sequenceId
293    * @throws IOException
294    * @deprecated For tests only and even then, should use
295    * {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean, 
296    * List)} and {@link #sync()} instead.
297    */
298   @VisibleForTesting
299   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
300       final long now, HTableDescriptor htd, AtomicLong sequenceId) throws IOException;
301 
302   /**
303    * For notification post append to the writer.  Used by metrics system at least.
304    * @param entry
305    * @param elapsedTime
306    * @return Size of this append.
307    */
308   long postAppend(final Entry entry, final long elapsedTime);
309 
310   /**
311    * For notification post writer sync.  Used by metrics system at least.
312    * @param timeInMillis How long the filesystem sync took in milliseconds.
313    * @param handlerSyncs How many sync handler calls were released by this call to filesystem
314    * sync.
315    */
316   void postSync(final long timeInMillis, final int handlerSyncs);
317 
318   /**
319    * Append a set of edits to the WAL. WAL edits are keyed by (encoded) regionName, rowname, and
320    * log-sequence-id. The WAL is not flushed/sync'd after this transaction completes BUT on return
321    * this edit must have its region edit/sequence id assigned else it messes up our unification
322    * of mvcc and sequenceid.
323    * @param info
324    * @param tableName
325    * @param edits
326    * @param clusterIds
327    * @param now
328    * @param htd
329    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
330    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
331    * attach the sequence to the edit we apply the WAL.
332    * @param isInMemstore Always true except for case where we are writing a compaction completion
333    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
334    * -- it is not an edit for memstore.
335    * @param nonceGroup
336    * @param nonce
337    * @return Returns a 'transaction id'.  Do not use. This is an internal implementation detail and
338    * cannot be respected in all implementations; i.e. the append/sync machine may or may not be
339    * able to sync an explicit edit only (the current default implementation syncs up to the time
340    * of the sync call syncing whatever is behind the sync).
341    * @throws IOException
342    * @deprecated Use {@link #appendNoSync(HTableDescriptor, HRegionInfo, HLogKey, WALEdit, AtomicLong, boolean, List)}
343    * instead because you can get back the region edit/sequenceid; it is set into the passed in
344    * <code>key</code>.
345    */
346   long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
347       List<UUID> clusterIds, final long now, HTableDescriptor htd, AtomicLong sequenceId,
348       boolean isInMemstore, long nonceGroup, long nonce) throws IOException;
349 
350   /**
351    * Append a set of edits to the WAL. The WAL is not flushed/sync'd after this transaction
352    * completes BUT on return this edit must have its region edit/sequence id assigned
353    * else it messes up our unification of mvcc and sequenceid.  On return <code>key</code> will
354    * have the region edit/sequence id filled in.
355    * @param info
356    * @param key Modified by this call; we add to it this edits region edit/sequence id.
357    * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
358    * sequence id that is after all currently appended edits.
359    * @param htd
360    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
361    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
362    * attach the sequence to the edit we apply the WAL.
363    * @param inMemstore Always true except for case where we are writing a compaction completion
364    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
365    * -- it is not an edit for memstore.
366    * @param memstoreKVs list of KVs added into memstore
367    * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
368    * in it.
369    * @throws IOException
370    */
371   long appendNoSync(HTableDescriptor htd, HRegionInfo info, HLogKey key, WALEdit edits,
372       AtomicLong sequenceId, boolean inMemstore, List<KeyValue> memstoreKVs)
373   throws IOException;
374 
375   // TODO: Do we need all these versions of sync?
376   void hsync() throws IOException;
377 
378   void hflush() throws IOException;
379 
380   /**
381    * Sync what we have in the WAL.
382    * @throws IOException
383    */
384   void sync() throws IOException;
385 
386   /**
387    * Sync the WAL if the txId was not already sync'd.
388    * @param txid Transaction id to sync to.
389    * @throws IOException
390    */
391   void sync(long txid) throws IOException;
392 
393   /**
394    * WAL keeps track of the sequence numbers that were not yet flushed from memstores
395    * in order to be able to do cleanup. This method tells WAL that some region is about
396    * to flush memstore.
397    *
398    * <p>We stash the oldest seqNum for the region, and let the the next edit inserted in this
399    * region be recorded in {@link #append(HRegionInfo, TableName, WALEdit, long, HTableDescriptor,
400    * AtomicLong)} as new oldest seqnum.
401    * In case of flush being aborted, we put the stashed value back; in case of flush succeeding,
402    * the seqNum of that first edit after start becomes the valid oldest seqNum for this region.
403    *
404    * @return true if the flush can proceed, false in case wal is closing (ususally, when server is
405    * closing) and flush couldn't be started.
406    */
407   boolean startCacheFlush(final byte[] encodedRegionName);
408 
409   /**
410    * Complete the cache flush.
411    * @param encodedRegionName Encoded region name.
412    */
413   void completeCacheFlush(final byte[] encodedRegionName);
414 
415   /**
416    * Abort a cache flush. Call if the flush fails. Note that the only recovery
417    * for an aborted flush currently is a restart of the regionserver so the
418    * snapshot content dropped by the failure gets restored to the memstore.v
419    * @param encodedRegionName Encoded region name.
420    */
421   void abortCacheFlush(byte[] encodedRegionName);
422 
423   /**
424    * @return Coprocessor host.
425    */
426   WALCoprocessorHost getCoprocessorHost();
427 
428   /**
429    * Get LowReplication-Roller status
430    *
431    * @return lowReplicationRollEnabled
432    */
433   // TODO: This is implementation detail?
434   boolean isLowReplicationRollEnabled();
435 
436   /** Gets the earliest sequence number in the memstore for this particular region.
437    * This can serve as best-effort "recent" WAL number for this region.
438    * @param encodedRegionName The region to get the number for.
439    * @return The number if present, HConstants.NO_SEQNUM if absent.
440    */
441   long getEarliestMemstoreSeqNum(byte[] encodedRegionName);
442 }