001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import static org.apache.commons.lang3.StringUtils.isNumeric; 021 022import java.io.Closeable; 023import java.io.IOException; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; 030import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 031import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 032import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; 033import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.apache.yetus.audience.InterfaceStability; 036 037/** 038 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides 039 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some 040 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine 041 * if they have already seen a given WAL. 042 */ 043@InterfaceAudience.Private 044@InterfaceStability.Evolving 045public interface WAL extends Closeable, WALFileLengthProvider { 046 047 /** 048 * Registers WALActionsListener 049 */ 050 void registerWALActionsListener(final WALActionsListener listener); 051 052 /** 053 * Unregisters WALActionsListener 054 */ 055 boolean unregisterWALActionsListener(final WALActionsListener listener); 056 057 /** 058 * Roll the log writer. That is, start writing log messages to a new file. 059 * <p/> 060 * The implementation is synchronized in order to make sure there's one rollWriter running at any 061 * given time. 062 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 063 * logs. Returns null if nothing to flush. Names are actual region names as returned by 064 * {@link RegionInfo#getEncodedName()} 065 */ 066 Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException; 067 068 /** 069 * Roll the log writer. That is, start writing log messages to a new file. 070 * <p/> 071 * The implementation is synchronized in order to make sure there's one rollWriter running at any 072 * given time. n * If true, force creation of a new writer even if no entries have been written to 073 * the current writer 074 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 075 * logs. Returns null if nothing to flush. Names are actual region names as returned by 076 * {@link RegionInfo#getEncodedName()} 077 */ 078 Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException; 079 080 /** 081 * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits 082 * are left in place in backing storage to be replayed later. 083 */ 084 void shutdown() throws IOException; 085 086 /** 087 * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying 088 * resources after this call; i.e. filesystem based WALs can archive or delete files. 089 */ 090 @Override 091 void close() throws IOException; 092 093 /** 094 * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will 095 * also have transitioned through the memstore. 096 * <p/> 097 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 098 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 099 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 100 * @param info the regioninfo associated with append 101 * @param key Modified by this call; we add to it this edits region edit/sequence id. 102 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 103 * sequence id that is after all currently appended edits. 104 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 105 * in it. 106 * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit) 107 */ 108 long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 109 110 /** 111 * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a 112 * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region 113 * close. The difference between a 'marker' append and a 'data' append as in 114 * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned 115 * through the memstore. 116 * <p/> 117 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 118 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 119 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 120 * @param info the regioninfo associated with append 121 * @param key Modified by this call; we add to it this edits region edit/sequence id. 122 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 123 * sequence id that is after all currently appended edits. 124 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 125 * in it. 126 * @see #appendData(RegionInfo, WALKeyImpl, WALEdit) 127 */ 128 long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 129 130 /** 131 * updates the seuence number of a specific store. depending on the flag: replaces current seq 132 * number if the given seq id is bigger, or even if it is lower than existing one 133 */ 134 void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, 135 boolean onlyIfGreater); 136 137 /** 138 * Sync what we have in the WAL. 139 */ 140 void sync() throws IOException; 141 142 /** 143 * Sync the WAL if the txId was not already sync'd. 144 * @param txid Transaction id to sync to. 145 */ 146 void sync(long txid) throws IOException; 147 148 /** 149 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 150 * vs hsync. 151 */ 152 default void sync(boolean forceSync) throws IOException { 153 sync(); 154 } 155 156 /** 157 * @param txid Transaction id to sync to. 158 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 159 * vs hsync. 160 */ 161 default void sync(long txid, boolean forceSync) throws IOException { 162 sync(txid); 163 } 164 165 /** 166 * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be 167 * able to do accounting to figure which WALs can be let go. This method tells WAL that some 168 * region is about to flush. The flush can be the whole region or for a column family of the 169 * region only. 170 * <p> 171 * Currently, it is expected that the update lock is held for the region; i.e. no concurrent 172 * appends while we set up cache flush. 173 * @param families Families to flush. May be a subset of all families in the region. 174 * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are 175 * flushing a subset of all families but there are no edits in those families not being 176 * flushed; in other words, this is effectively same as a flush of all of the region 177 * though we were passed a subset of regions. Otherwise, it returns the sequence id of the 178 * oldest/lowest outstanding edit. 179 * @see #completeCacheFlush(byte[], long) 180 * @see #abortCacheFlush(byte[]) 181 */ 182 Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families); 183 184 Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq); 185 186 /** 187 * Complete the cache flush. 188 * @param encodedRegionName Encoded region name. 189 * @param maxFlushedSeqId The maxFlushedSeqId for this flush. There is no edit in memory that is 190 * less that this sequence id. 191 * @see #startCacheFlush(byte[], Set) 192 * @see #abortCacheFlush(byte[]) 193 */ 194 void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId); 195 196 /** 197 * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush 198 * currently is a restart of the regionserver so the snapshot content dropped by the failure gets 199 * restored to the memstore. 200 * @param encodedRegionName Encoded region name. 201 */ 202 void abortCacheFlush(byte[] encodedRegionName); 203 204 /** Returns Coprocessor host. */ 205 WALCoprocessorHost getCoprocessorHost(); 206 207 /** 208 * Gets the earliest unflushed sequence id in the memstore for the region. 209 * @param encodedRegionName The region to get the number for. 210 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 211 * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal 212 * workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])} 213 */ 214 @Deprecated 215 long getEarliestMemStoreSeqNum(byte[] encodedRegionName); 216 217 /** 218 * Gets the earliest unflushed sequence id in the memstore for the store. 219 * @param encodedRegionName The region to get the number for. 220 * @param familyName The family to get the number for. 221 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 222 */ 223 long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName); 224 225 /** 226 * Human readable identifying information about the state of this WAL. Implementors are encouraged 227 * to include information appropriate for debugging. Consumers are advised not to rely on the 228 * details of the returned String; it does not have a defined structure. 229 */ 230 @Override 231 String toString(); 232 233 /** 234 * When outside clients need to consume persisted WALs, they rely on a provided Reader. 235 */ 236 interface Reader extends Closeable { 237 Entry next() throws IOException; 238 239 Entry next(Entry reuse) throws IOException; 240 241 void seek(long pos) throws IOException; 242 243 long getPosition() throws IOException; 244 245 void reset() throws IOException; 246 } 247 248 /** 249 * Utility class that lets us keep track of the edit with it's key. 250 */ 251 class Entry { 252 private final WALEdit edit; 253 private final WALKeyImpl key; 254 255 public Entry() { 256 this(new WALKeyImpl(), new WALEdit()); 257 } 258 259 /** 260 * Constructor for both params 261 * @param edit log's edit 262 * @param key log's key 263 */ 264 public Entry(WALKeyImpl key, WALEdit edit) { 265 this.key = key; 266 this.edit = edit; 267 } 268 269 /** 270 * Gets the edit n 271 */ 272 public WALEdit getEdit() { 273 return edit; 274 } 275 276 /** 277 * Gets the key n 278 */ 279 public WALKeyImpl getKey() { 280 return key; 281 } 282 283 /** 284 * Set compression context for this entry. n * Compression context 285 * @deprecated deparcated since hbase 2.1.0 286 */ 287 @Deprecated 288 public void setCompressionContext(CompressionContext compressionContext) { 289 key.setCompressionContext(compressionContext); 290 } 291 292 @Override 293 public String toString() { 294 return this.key + "=" + this.edit; 295 } 296 } 297 298 /** 299 * Split a WAL filename to get a start time. WALs usually have the time we start writing to them 300 * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it 301 * is a WAL for the meta table. For example, WALs might look like this 302 * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the 303 * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a synchronous replication 304 * WAL which adds a '.syncrep' suffix. Check for these. File also may have no timestamp on it. For 305 * example the recovered.edits files are WALs but are named in ascending order. Here is an 306 * example: 0000000000000016310. Allow for this. 307 * @param name Name of the WAL file. 308 * @return Timestamp or -1. 309 */ 310 public static long getTimestamp(String name) { 311 String[] splits = name.split("\\."); 312 if (splits.length <= 1) { 313 return -1; 314 } 315 String timestamp = splits[splits.length - 1]; 316 if (!isNumeric(timestamp)) { 317 // Its a '.meta' or a '.syncrep' suffix. 318 timestamp = splits[splits.length - 2]; 319 if (!isNumeric(timestamp)) { 320 return -1; 321 } 322 } 323 return Long.parseLong(timestamp); 324 } 325}