001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.Map; 023import java.util.Set; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.client.RegionInfo; 026import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; 027import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 028import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 029import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; 030import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.apache.yetus.audience.InterfaceStability; 033 034import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 035 036/** 037 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides 038 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). 039 * 040 * Note that some internals, such as log rolling and performance evaluation tools, will use 041 * WAL.equals to determine if they have already seen a given WAL. 042 */ 043@InterfaceAudience.Private 044@InterfaceStability.Evolving 045public interface WAL extends Closeable, WALFileLengthProvider { 046 047 /** 048 * Registers WALActionsListener 049 */ 050 void registerWALActionsListener(final WALActionsListener listener); 051 052 /** 053 * Unregisters WALActionsListener 054 */ 055 boolean unregisterWALActionsListener(final WALActionsListener listener); 056 057 /** 058 * Roll the log writer. That is, start writing log messages to a new file. 059 * 060 * <p/> 061 * The implementation is synchronized in order to make sure there's one rollWriter 062 * running at any given time. 063 * 064 * @return If lots of logs, flush the returned regions so next time through we 065 * can clean logs. Returns null if nothing to flush. Names are actual 066 * region names as returned by {@link RegionInfo#getEncodedName()} 067 */ 068 byte[][] rollWriter() throws FailedLogCloseException, IOException; 069 070 /** 071 * Roll the log writer. That is, start writing log messages to a new file. 072 * 073 * <p/> 074 * The implementation is synchronized in order to make sure there's one rollWriter 075 * running at any given time. 076 * 077 * @param force 078 * If true, force creation of a new writer even if no entries have 079 * been written to the current writer 080 * @return If lots of logs, flush the returned regions so next time through we 081 * can clean logs. Returns null if nothing to flush. Names are actual 082 * region names as returned by {@link RegionInfo#getEncodedName()} 083 */ 084 byte[][] rollWriter(boolean force) throws IOException; 085 086 /** 087 * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. 088 * Extant edits are left in place in backing storage to be replayed later. 089 */ 090 void shutdown() throws IOException; 091 092 /** 093 * Caller no longer needs any edits from this WAL. Implementers are free to reclaim 094 * underlying resources after this call; i.e. filesystem based WALs can archive or 095 * delete files. 096 */ 097 @Override 098 void close() throws IOException; 099 100 /** 101 * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will 102 * also have transitioned through the memstore. 103 * <p/> 104 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 105 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 106 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 107 * @param info the regioninfo associated with append 108 * @param key Modified by this call; we add to it this edits region edit/sequence id. 109 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 110 * sequence id that is after all currently appended edits. 111 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 112 * in it. 113 * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit) 114 */ 115 long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 116 117 /** 118 * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could 119 * be a FlushDescriptor, a compaction marker, or a region event marker; e.g. region open 120 * or region close. The difference between a 'marker' append and a 'data' append as in 121 * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have 122 * transitioned through the memstore. 123 * <p/> 124 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 125 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 126 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 127 * @param info the regioninfo associated with append 128 * @param key Modified by this call; we add to it this edits region edit/sequence id. 129 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 130 * sequence id that is after all currently appended edits. 131 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 132 * in it. 133 * @see #appendData(RegionInfo, WALKeyImpl, WALEdit) 134 */ 135 long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 136 137 /** 138 * updates the seuence number of a specific store. 139 * depending on the flag: replaces current seq number if the given seq id is bigger, 140 * or even if it is lower than existing one 141 */ 142 void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, 143 boolean onlyIfGreater); 144 145 /** 146 * Sync what we have in the WAL. 147 */ 148 void sync() throws IOException; 149 150 /** 151 * Sync the WAL if the txId was not already sync'd. 152 * @param txid Transaction id to sync to. 153 */ 154 void sync(long txid) throws IOException; 155 156 /** 157 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 158 * vs hsync. 159 */ 160 default void sync(boolean forceSync) throws IOException { 161 sync(); 162 } 163 164 /** 165 * @param txid Transaction id to sync to. 166 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 167 * vs hsync. 168 */ 169 default void sync(long txid, boolean forceSync) throws IOException { 170 sync(txid); 171 } 172 173 /** 174 * WAL keeps track of the sequence numbers that are as yet not flushed im memstores 175 * in order to be able to do accounting to figure which WALs can be let go. This method tells WAL 176 * that some region is about to flush. The flush can be the whole region or for a column family 177 * of the region only. 178 * 179 * <p>Currently, it is expected that the update lock is held for the region; i.e. no 180 * concurrent appends while we set up cache flush. 181 * @param families Families to flush. May be a subset of all families in the region. 182 * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if 183 * we are flushing a subset of all families but there are no edits in those families not 184 * being flushed; in other words, this is effectively same as a flush of all of the region 185 * though we were passed a subset of regions. Otherwise, it returns the sequence id of the 186 * oldest/lowest outstanding edit. 187 * @see #completeCacheFlush(byte[]) 188 * @see #abortCacheFlush(byte[]) 189 */ 190 Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families); 191 192 Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq); 193 194 /** 195 * Complete the cache flush. 196 * @param encodedRegionName Encoded region name. 197 * @see #startCacheFlush(byte[], Set) 198 * @see #abortCacheFlush(byte[]) 199 */ 200 void completeCacheFlush(final byte[] encodedRegionName); 201 202 /** 203 * Abort a cache flush. Call if the flush fails. Note that the only recovery 204 * for an aborted flush currently is a restart of the regionserver so the 205 * snapshot content dropped by the failure gets restored to the memstore. 206 * @param encodedRegionName Encoded region name. 207 */ 208 void abortCacheFlush(byte[] encodedRegionName); 209 210 /** 211 * @return Coprocessor host. 212 */ 213 WALCoprocessorHost getCoprocessorHost(); 214 215 /** 216 * Gets the earliest unflushed sequence id in the memstore for the region. 217 * @param encodedRegionName The region to get the number for. 218 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 219 * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal 220 * workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])} 221 */ 222 @VisibleForTesting 223 @Deprecated 224 long getEarliestMemStoreSeqNum(byte[] encodedRegionName); 225 226 /** 227 * Gets the earliest unflushed sequence id in the memstore for the store. 228 * @param encodedRegionName The region to get the number for. 229 * @param familyName The family to get the number for. 230 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 231 */ 232 long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName); 233 234 /** 235 * Human readable identifying information about the state of this WAL. 236 * Implementors are encouraged to include information appropriate for debugging. 237 * Consumers are advised not to rely on the details of the returned String; it does 238 * not have a defined structure. 239 */ 240 @Override 241 String toString(); 242 243 /** 244 * When outside clients need to consume persisted WALs, they rely on a provided 245 * Reader. 246 */ 247 interface Reader extends Closeable { 248 Entry next() throws IOException; 249 Entry next(Entry reuse) throws IOException; 250 void seek(long pos) throws IOException; 251 long getPosition() throws IOException; 252 void reset() throws IOException; 253 } 254 255 /** 256 * Utility class that lets us keep track of the edit with it's key. 257 */ 258 class Entry { 259 private final WALEdit edit; 260 private final WALKeyImpl key; 261 262 public Entry() { 263 this(new WALKeyImpl(), new WALEdit()); 264 } 265 266 /** 267 * Constructor for both params 268 * 269 * @param edit log's edit 270 * @param key log's key 271 */ 272 public Entry(WALKeyImpl key, WALEdit edit) { 273 this.key = key; 274 this.edit = edit; 275 } 276 277 /** 278 * Gets the edit 279 * 280 * @return edit 281 */ 282 public WALEdit getEdit() { 283 return edit; 284 } 285 286 /** 287 * Gets the key 288 * 289 * @return key 290 */ 291 public WALKeyImpl getKey() { 292 return key; 293 } 294 295 /** 296 * Set compression context for this entry. 297 * 298 * @param compressionContext 299 * Compression context 300 * @deprecated deparcated since hbase 2.1.0 301 */ 302 @Deprecated 303 public void setCompressionContext(CompressionContext compressionContext) { 304 key.setCompressionContext(compressionContext); 305 } 306 307 @Override 308 public String toString() { 309 return this.key + "=" + this.edit; 310 } 311 } 312}