001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.List; 023import java.util.Map; 024import java.util.Set; 025import org.apache.hadoop.hbase.HConstants; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; 028import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 029import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 030import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; 031import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.apache.yetus.audience.InterfaceStability; 034 035import static org.apache.commons.lang3.StringUtils.isNumeric; 036 037/** 038 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides 039 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). 040 * 041 * Note that some internals, such as log rolling and performance evaluation tools, will use 042 * WAL.equals to determine if they have already seen a given WAL. 043 */ 044@InterfaceAudience.Private 045@InterfaceStability.Evolving 046public interface WAL extends Closeable, WALFileLengthProvider { 047 048 /** 049 * Registers WALActionsListener 050 */ 051 void registerWALActionsListener(final WALActionsListener listener); 052 053 /** 054 * Unregisters WALActionsListener 055 */ 056 boolean unregisterWALActionsListener(final WALActionsListener listener); 057 058 /** 059 * Roll the log writer. That is, start writing log messages to a new file. 060 * 061 * <p/> 062 * The implementation is synchronized in order to make sure there's one rollWriter 063 * running at any given time. 064 * 065 * @return If lots of logs, flush the stores of returned regions so next time through we 066 * can clean logs. Returns null if nothing to flush. Names are actual 067 * region names as returned by {@link RegionInfo#getEncodedName()} 068 */ 069 Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException; 070 071 /** 072 * Roll the log writer. That is, start writing log messages to a new file. 073 * 074 * <p/> 075 * The implementation is synchronized in order to make sure there's one rollWriter 076 * running at any given time. 077 * 078 * @param force 079 * If true, force creation of a new writer even if no entries have 080 * been written to the current writer 081 * @return If lots of logs, flush the stores of returned regions so next time through we 082 * can clean logs. Returns null if nothing to flush. Names are actual 083 * region names as returned by {@link RegionInfo#getEncodedName()} 084 */ 085 Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException; 086 087 /** 088 * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. 089 * Extant edits are left in place in backing storage to be replayed later. 090 */ 091 void shutdown() throws IOException; 092 093 /** 094 * Caller no longer needs any edits from this WAL. Implementers are free to reclaim 095 * underlying resources after this call; i.e. filesystem based WALs can archive or 096 * delete files. 097 */ 098 @Override 099 void close() throws IOException; 100 101 /** 102 * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will 103 * also have transitioned through the memstore. 104 * <p/> 105 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 106 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 107 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 108 * @param info the regioninfo associated with append 109 * @param key Modified by this call; we add to it this edits region edit/sequence id. 110 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 111 * sequence id that is after all currently appended edits. 112 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 113 * in it. 114 * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit) 115 */ 116 long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 117 118 /** 119 * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could 120 * be a FlushDescriptor, a compaction marker, or a region event marker; e.g. region open 121 * or region close. The difference between a 'marker' append and a 'data' append as in 122 * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have 123 * transitioned through the memstore. 124 * <p/> 125 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 126 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 127 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 128 * @param info the regioninfo associated with append 129 * @param key Modified by this call; we add to it this edits region edit/sequence id. 130 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 131 * sequence id that is after all currently appended edits. 132 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 133 * in it. 134 * @see #appendData(RegionInfo, WALKeyImpl, WALEdit) 135 */ 136 long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 137 138 /** 139 * updates the seuence number of a specific store. 140 * depending on the flag: replaces current seq number if the given seq id is bigger, 141 * or even if it is lower than existing one 142 */ 143 void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, 144 boolean onlyIfGreater); 145 146 /** 147 * Sync what we have in the WAL. 148 */ 149 void sync() throws IOException; 150 151 /** 152 * Sync the WAL if the txId was not already sync'd. 153 * @param txid Transaction id to sync to. 154 */ 155 void sync(long txid) throws IOException; 156 157 /** 158 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 159 * vs hsync. 160 */ 161 default void sync(boolean forceSync) throws IOException { 162 sync(); 163 } 164 165 /** 166 * @param txid Transaction id to sync to. 167 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 168 * vs hsync. 169 */ 170 default void sync(long txid, boolean forceSync) throws IOException { 171 sync(txid); 172 } 173 174 /** 175 * WAL keeps track of the sequence numbers that are as yet not flushed im memstores 176 * in order to be able to do accounting to figure which WALs can be let go. This method tells WAL 177 * that some region is about to flush. The flush can be the whole region or for a column family 178 * of the region only. 179 * 180 * <p>Currently, it is expected that the update lock is held for the region; i.e. no 181 * concurrent appends while we set up cache flush. 182 * @param families Families to flush. May be a subset of all families in the region. 183 * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if 184 * we are flushing a subset of all families but there are no edits in those families not 185 * being flushed; in other words, this is effectively same as a flush of all of the region 186 * though we were passed a subset of regions. Otherwise, it returns the sequence id of the 187 * oldest/lowest outstanding edit. 188 * @see #completeCacheFlush(byte[], long) 189 * @see #abortCacheFlush(byte[]) 190 */ 191 Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families); 192 193 Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq); 194 195 /** 196 * Complete the cache flush. 197 * @param encodedRegionName Encoded region name. 198 * @param maxFlushedSeqId The maxFlushedSeqId for this flush. There is no edit in memory that is 199 * less that this sequence id. 200 * @see #startCacheFlush(byte[], Set) 201 * @see #abortCacheFlush(byte[]) 202 */ 203 void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId); 204 205 /** 206 * Abort a cache flush. Call if the flush fails. Note that the only recovery 207 * for an aborted flush currently is a restart of the regionserver so the 208 * snapshot content dropped by the failure gets restored to the memstore. 209 * @param encodedRegionName Encoded region name. 210 */ 211 void abortCacheFlush(byte[] encodedRegionName); 212 213 /** 214 * @return Coprocessor host. 215 */ 216 WALCoprocessorHost getCoprocessorHost(); 217 218 /** 219 * Gets the earliest unflushed sequence id in the memstore for the region. 220 * @param encodedRegionName The region to get the number for. 221 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 222 * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal 223 * workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])} 224 */ 225 @Deprecated 226 long getEarliestMemStoreSeqNum(byte[] encodedRegionName); 227 228 /** 229 * Gets the earliest unflushed sequence id in the memstore for the store. 230 * @param encodedRegionName The region to get the number for. 231 * @param familyName The family to get the number for. 232 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 233 */ 234 long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName); 235 236 /** 237 * Human readable identifying information about the state of this WAL. 238 * Implementors are encouraged to include information appropriate for debugging. 239 * Consumers are advised not to rely on the details of the returned String; it does 240 * not have a defined structure. 241 */ 242 @Override 243 String toString(); 244 245 /** 246 * When outside clients need to consume persisted WALs, they rely on a provided 247 * Reader. 248 */ 249 interface Reader extends Closeable { 250 Entry next() throws IOException; 251 Entry next(Entry reuse) throws IOException; 252 void seek(long pos) throws IOException; 253 long getPosition() throws IOException; 254 void reset() throws IOException; 255 } 256 257 /** 258 * Utility class that lets us keep track of the edit with it's key. 259 */ 260 class Entry { 261 private final WALEdit edit; 262 private final WALKeyImpl key; 263 264 public Entry() { 265 this(new WALKeyImpl(), new WALEdit()); 266 } 267 268 /** 269 * Constructor for both params 270 * 271 * @param edit log's edit 272 * @param key log's key 273 */ 274 public Entry(WALKeyImpl key, WALEdit edit) { 275 this.key = key; 276 this.edit = edit; 277 } 278 279 /** 280 * Gets the edit 281 * 282 * @return edit 283 */ 284 public WALEdit getEdit() { 285 return edit; 286 } 287 288 /** 289 * Gets the key 290 * 291 * @return key 292 */ 293 public WALKeyImpl getKey() { 294 return key; 295 } 296 297 /** 298 * Set compression context for this entry. 299 * 300 * @param compressionContext 301 * Compression context 302 * @deprecated deparcated since hbase 2.1.0 303 */ 304 @Deprecated 305 public void setCompressionContext(CompressionContext compressionContext) { 306 key.setCompressionContext(compressionContext); 307 } 308 309 @Override 310 public String toString() { 311 return this.key + "=" + this.edit; 312 } 313 } 314 315 /** 316 * Split a WAL filename to get a start time. WALs usually have the time we start writing to them 317 * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it 318 * is a WAL for the meta table. For example, WALs might look like this 319 * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the 320 * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a 321 * synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have 322 * no timestamp on it. For example the recovered.edits files are WALs but are named in ascending 323 * order. Here is an example: 0000000000000016310. Allow for this. 324 * @param name Name of the WAL file. 325 * @return Timestamp or -1. 326 */ 327 public static long getTimestamp(String name) { 328 String [] splits = name.split("\\."); 329 if (splits.length <= 1) { 330 return -1; 331 } 332 String timestamp = splits[splits.length - 1]; 333 if (!isNumeric(timestamp)) { 334 // Its a '.meta' or a '.syncrep' suffix. 335 timestamp = splits[splits.length - 2]; 336 if (!isNumeric(timestamp)) { 337 return -1; 338 } 339 } 340 return Long.parseLong(timestamp); 341 } 342}