View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver.wal;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.List;
26  import java.util.UUID;
27  import java.util.concurrent.atomic.AtomicLong;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALTrailer;
41  import org.apache.hadoop.io.Writable;
42  
43  import com.google.common.annotations.VisibleForTesting;
44  
45  /**
46   * HLog records all the edits to HStore.  It is the hbase write-ahead-log (WAL).
47   */
48  @InterfaceAudience.Private
49  // TODO: Rename interface to WAL
50  public interface HLog {
51    Log LOG = LogFactory.getLog(HLog.class);
52  
53    /** File Extension used while splitting an HLog into regions (HBASE-2312) */
54    // TODO: this seems like an implementation detail that does not belong here.
55    String SPLITTING_EXT = "-splitting";
56    boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
57    /** The hbase:meta region's HLog filename extension.*/
58    // TODO: Implementation detail.  Does not belong in here.
59    String META_HLOG_FILE_EXTN = ".meta";
60  
61    /**
62     * Configuration name of HLog Trailer's warning size. If a waltrailer's size is greater than the
63     * configured size, a warning is logged. This is used with Protobuf reader/writer.
64     */
65    // TODO: Implementation detail.  Why in here?
66    String WAL_TRAILER_WARN_SIZE = "hbase.regionserver.waltrailer.warn.size";
67    int DEFAULT_WAL_TRAILER_WARN_SIZE = 1024 * 1024; // 1MB
68  
69    // TODO: Implementation detail.  Why in here?
70    Pattern EDITFILES_NAME_PATTERN = Pattern.compile("-?[0-9]+");
71    String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
72  
73    /**
74     * WAL Reader Interface
75     */
76    interface Reader {
77      /**
78       * @param fs File system.
79       * @param path Path.
80       * @param c Configuration.
81       * @param s Input stream that may have been pre-opened by the caller; may be null.
82       */
83      void init(FileSystem fs, Path path, Configuration c, FSDataInputStream s) throws IOException;
84  
85      void close() throws IOException;
86  
87      Entry next() throws IOException;
88  
89      Entry next(Entry reuse) throws IOException;
90  
91      void seek(long pos) throws IOException;
92  
93      long getPosition() throws IOException;
94      void reset() throws IOException;
95  
96      /**
97       * @return the WALTrailer of the current HLog. It may be null in case of legacy or corrupt WAL
98       * files.
99       */
100     // TODO: What we need a trailer on WAL for?  It won't be present on last WAL most of the time.
101     // What then?
102     WALTrailer getWALTrailer();
103   }
104 
105   /**
106    * WAL Writer Intrface.
107    */
108   interface Writer {
109     void init(FileSystem fs, Path path, Configuration c, boolean overwritable) throws IOException;
110 
111     void close() throws IOException;
112 
113     void sync() throws IOException;
114 
115     void append(Entry entry) throws IOException;
116 
117     long getLength() throws IOException;
118 
119     /**
120      * Sets HLog/WAL's WALTrailer. This trailer is appended at the end of WAL on closing.
121      * @param walTrailer trailer to append to WAL.
122      */
123     // TODO: Why a trailer on the log?
124     void setWALTrailer(WALTrailer walTrailer);
125   }
126 
127   /**
128    * Utility class that lets us keep track of the edit and it's associated key. Only used when
129    * splitting logs.
130    */
131   // TODO: Remove this Writable.
132   // TODO: Why is this in here?  Implementation detail?
133   class Entry implements Writable {
134     private WALEdit edit;
135     private HLogKey key;
136 
137     public Entry() {
138       edit = new WALEdit();
139       key = new HLogKey();
140     }
141 
142     /**
143      * Constructor for both params
144      *
145      * @param edit log's edit
146      * @param key log's key
147      */
148     public Entry(HLogKey key, WALEdit edit) {
149       this.key = key;
150       this.edit = edit;
151     }
152 
153     /**
154      * Gets the edit
155      *
156      * @return edit
157      */
158     public WALEdit getEdit() {
159       return edit;
160     }
161 
162     /**
163      * Gets the key
164      *
165      * @return key
166      */
167     public HLogKey getKey() {
168       return key;
169     }
170 
171     /**
172      * Set compression context for this entry.
173      *
174      * @param compressionContext Compression context
175      */
176     public void setCompressionContext(CompressionContext compressionContext) {
177       edit.setCompressionContext(compressionContext);
178       key.setCompressionContext(compressionContext);
179     }
180 
181     @Override
182     public String toString() {
183       return this.key + "=" + this.edit;
184     }
185 
186     @Override
187     @SuppressWarnings("deprecation")
188     public void write(DataOutput dataOutput) throws IOException {
189       this.key.write(dataOutput);
190       this.edit.write(dataOutput);
191     }
192 
193     @Override
194     public void readFields(DataInput dataInput) throws IOException {
195       this.key.readFields(dataInput);
196       this.edit.readFields(dataInput);
197     }
198   }
199 
200   /**
201    * Registers WALActionsListener
202    *
203    * @param listener
204    */
205   void registerWALActionsListener(final WALActionsListener listener);
206 
207   /**
208    * Unregisters WALActionsListener
209    *
210    * @param listener
211    */
212   boolean unregisterWALActionsListener(final WALActionsListener listener);
213 
214   /**
215    * @return Current state of the monotonically increasing file id.
216    */
217   // TODO: Remove.  Implementation detail.
218   long getFilenum();
219 
220   /**
221    * @return the number of HLog files
222    */
223   int getNumLogFiles();
224   
225   /**
226    * @return the size of HLog files
227    */
228   long getLogFileSize();
229 
230   // TODO: Log rolling should not be in this interface.
231   /**
232    * Roll the log writer. That is, start writing log messages to a new file.
233    *
234    * <p>
235    * The implementation is synchronized in order to make sure there's one rollWriter
236    * running at any given time.
237    *
238    * @return If lots of logs, flush the returned regions so next time through we
239    *         can clean logs. Returns null if nothing to flush. Names are actual
240    *         region names as returned by {@link HRegionInfo#getEncodedName()}
241    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
242    * @throws IOException
243    */
244   byte[][] rollWriter() throws FailedLogCloseException, IOException;
245 
246   /**
247    * Roll the log writer. That is, start writing log messages to a new file.
248    *
249    * <p>
250    * The implementation is synchronized in order to make sure there's one rollWriter
251    * running at any given time.
252    *
253    * @param force
254    *          If true, force creation of a new writer even if no entries have
255    *          been written to the current writer
256    * @return If lots of logs, flush the returned regions so next time through we
257    *         can clean logs. Returns null if nothing to flush. Names are actual
258    *         region names as returned by {@link HRegionInfo#getEncodedName()}
259    * @throws org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException
260    * @throws IOException
261    */
262   byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException;
263 
264   /**
265    * Shut down the log.
266    *
267    * @throws IOException
268    */
269   void close() throws IOException;
270 
271   /**
272    * Shut down the log and delete the log directory.
273    * Used by tests only and in rare cases where we need a log just temporarily while bootstrapping
274    * a region or running migrations.
275    *
276    * @throws IOException
277    */
278   void closeAndDelete() throws IOException;
279 
280   /**
281    * Same as {@link #appendNoSync(HRegionInfo, TableName, WALEdit, List, long, HTableDescriptor,
282    *   AtomicLong, boolean, long, long)}
283    * except it causes a sync on the log
284    * @param info
285    * @param tableName
286    * @param edits
287    * @param now
288    * @param htd
289    * @param sequenceId
290    * @throws IOException
291    */
292   @VisibleForTesting
293   public void append(HRegionInfo info, TableName tableName, WALEdit edits,
294       final long now, HTableDescriptor htd, AtomicLong sequenceId) throws IOException;
295 
296   /**
297    * For notification post append to the writer.  Used by metrics system at least.
298    * @param entry
299    * @param elapsedTime
300    * @return Size of this append.
301    */
302   long postAppend(final Entry entry, final long elapsedTime);
303 
304   /**
305    * For notification post writer sync.  Used by metrics system at least.
306    * @param timeInMillis How long the filesystem sync took in milliseconds.
307    * @param handlerSyncs How many sync handler calls were released by this call to filesystem
308    * sync.
309    */
310   void postSync(final long timeInMillis, final int handlerSyncs);
311 
312   /**
313    * Append a set of edits to the WAL. WAL edits are keyed by (encoded) regionName, rowname, and
314    * log-sequence-id. The WAL is not flushed/sync'd after this transaction completes.
315    * Call {@link #sync()} to flush/sync all outstanding edits/appends.
316    * @param info
317    * @param tableName
318    * @param edits
319    * @param clusterIds
320    * @param now
321    * @param htd
322    * @param sequenceId A reference to the atomic long the <code>info</code> region is using as
323    * source of its incrementing edits sequence id.  Inside in this call we will increment it and
324    * attach the sequence to the edit we apply the WAL.
325    * @param isInMemstore Always true except for case where we are writing a compaction completion
326    * record into the WAL; in this case the entry is just so we can finish an unfinished compaction
327    * -- it is not an edit for memstore.
328    * @param nonceGroup
329    * @param nonce
330    * @return Returns a 'transaction id'.  Do not use. This is an internal implementation detail and
331    * cannot be respected in all implementations; i.e. the append/sync machine may or may not be
332    * able to sync an explicit edit only (the current default implementation syncs up to the time
333    * of the sync call syncing whatever is behind the sync).
334    * @throws IOException
335    */
336   long appendNoSync(HRegionInfo info, TableName tableName, WALEdit edits,
337       List<UUID> clusterIds, final long now, HTableDescriptor htd, AtomicLong sequenceId,
338       boolean isInMemstore, long nonceGroup, long nonce) throws IOException;
339 
340   // TODO: Do we need all these versions of sync?
341   void hsync() throws IOException;
342 
343   void hflush() throws IOException;
344 
345   void sync() throws IOException;
346 
347   /**
348    * @param txid Transaction id to sync to.
349    * @throws IOException
350    * @deprecated Since 0.96.2.  Just call {@link #sync()}.  <code>txid</code> should not be allowed
351    * outside the implementation.
352    */
353   // TODO: Why is this exposed?  txid is an internal detail.
354   @Deprecated
355   void sync(long txid) throws IOException;
356 
357   /**
358    * WAL keeps track of the sequence numbers that were not yet flushed from memstores
359    * in order to be able to do cleanup. This method tells WAL that some region is about
360    * to flush memstore.
361    *
362    * <p>We stash the oldest seqNum for the region, and let the the next edit inserted in this
363    * region be recorded in {@link #append(HRegionInfo, TableName, WALEdit, long, HTableDescriptor,
364    * AtomicLong)} as new oldest seqnum.
365    * In case of flush being aborted, we put the stashed value back; in case of flush succeeding,
366    * the seqNum of that first edit after start becomes the valid oldest seqNum for this region.
367    *
368    * @return true if the flush can proceed, false in case wal is closing (ususally, when server is
369    * closing) and flush couldn't be started.
370    */
371   boolean startCacheFlush(final byte[] encodedRegionName);
372 
373   /**
374    * Complete the cache flush.
375    * @param encodedRegionName Encoded region name.
376    */
377   void completeCacheFlush(final byte[] encodedRegionName);
378 
379   /**
380    * Abort a cache flush. Call if the flush fails. Note that the only recovery
381    * for an aborted flush currently is a restart of the regionserver so the
382    * snapshot content dropped by the failure gets restored to the memstore.v
383    * @param encodedRegionName Encoded region name.
384    */
385   void abortCacheFlush(byte[] encodedRegionName);
386 
387   /**
388    * @return Coprocessor host.
389    */
390   WALCoprocessorHost getCoprocessorHost();
391 
392   /**
393    * Get LowReplication-Roller status
394    *
395    * @return lowReplicationRollEnabled
396    */
397   // TODO: This is implementation detail?
398   boolean isLowReplicationRollEnabled();
399 
400   /** Gets the earliest sequence number in the memstore for this particular region.
401    * This can serve as best-effort "recent" WAL number for this region.
402    * @param encodedRegionName The region to get the number for.
403    * @return The number if present, HConstants.NO_SEQNUM if absent.
404    */
405   long getEarliestMemstoreSeqNum(byte[] encodedRegionName);
406 }