001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Map; 024import java.util.Set; 025import java.util.TreeSet; 026import org.apache.hadoop.hbase.Cell; 027import org.apache.hadoop.hbase.CellUtil; 028import org.apache.hadoop.hbase.ExtendedCell; 029import org.apache.hadoop.hbase.HBaseInterfaceAudience; 030import org.apache.hadoop.hbase.KeyValue; 031import org.apache.hadoop.hbase.PrivateCellUtil; 032import org.apache.hadoop.hbase.client.RegionInfo; 033import org.apache.hadoop.hbase.codec.Codec; 034import org.apache.hadoop.hbase.io.HeapSize; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hbase.util.ClassSize; 037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 038import org.apache.yetus.audience.InterfaceAudience; 039 040import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; 041import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor; 042import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor; 044 045/** 046 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects) 047 * that came in as a single transaction. All the edits for a given transaction are written out as a 048 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder. 049 * <p> 050 * A particular WALEdit 'type' is the 'meta' type used to mark key operational events in the WAL 051 * such as compaction, flush, or region open. These meta types do not traverse hbase memstores. They 052 * are edits made by the hbase system rather than edit data submitted by clients. They only show in 053 * the WAL. These 'Meta' types have not been formally specified (or made into an explicit class 054 * type). They evolved organically. HBASE-8457 suggests codifying a WALEdit 'type' by adding a type 055 * field to WALEdit that gets serialized into the WAL. TODO. Would have to work on the 056 * consumption-side. Reading WALs on replay we seem to consume a Cell-at-a-time rather than by 057 * WALEdit. We are already in the below going out of our way to figure particular types -- e.g. if a 058 * compaction, replay, or close meta Marker -- during normal processing so would make sense to do 059 * this. Current system is an awkward marking of Cell columnfamily as {@link #METAFAMILY} and then 060 * setting qualifier based off meta edit type. For replay-time where we read Cell-at-a-time, there 061 * are utility methods below for figuring meta type. See also 062 * {@link #createBulkLoadEvent(RegionInfo, WALProtos.BulkLoadDescriptor)}, etc., for where we create 063 * meta WALEdit instances. 064 * </p> 065 * <p> 066 * WALEdit will accumulate a Set of all column family names referenced by the Cells 067 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the column 068 * family name to-hand.. just shove it into the WALEdit if available. Doing this, we can save on a 069 * parse of each Cell to figure column family down the line when we go to add the WALEdit to the WAL 070 * file. See the hand-off in FSWALEntry Constructor. 071 * @see WALKey 072 */ 073@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION, 074 HBaseInterfaceAudience.COPROC }) 075public class WALEdit implements HeapSize { 076 // Below defines are for writing WALEdit 'meta' Cells.. 077 // TODO: Get rid of this system of special 'meta' Cells. See HBASE-8457. It suggests 078 // adding a type to WALEdit itself for use denoting meta Edits and their types. 079 public static final byte[] METAFAMILY = Bytes.toBytes("METAFAMILY"); 080 081 /** 082 * @deprecated Since 2.3.0. Not used. 083 */ 084 @Deprecated 085 public static final byte[] METAROW = Bytes.toBytes("METAROW"); 086 087 /** 088 * @deprecated Since 2.3.0. Make it protected, internal-use only. Use 089 * {@link #isCompactionMarker(Cell)} 090 */ 091 @Deprecated 092 @InterfaceAudience.Private 093 public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION"); 094 095 /** 096 * @deprecated Since 2.3.0. Make it protected, internal-use only. 097 */ 098 @Deprecated 099 @InterfaceAudience.Private 100 public static final byte[] FLUSH = Bytes.toBytes("HBASE::FLUSH"); 101 102 /** 103 * Qualifier for region event meta 'Marker' WALEdits start with the {@link #REGION_EVENT_PREFIX} 104 * prefix ('HBASE::REGION_EVENT::'). After the prefix, we note the type of the event which we get 105 * from the RegionEventDescriptor protobuf instance type (A RegionEventDescriptor protobuf 106 * instance is written as the meta Marker Cell value). Adding a type suffix means we do not have 107 * to deserialize the protobuf to figure out what type of event this is.. .just read the qualifier 108 * suffix. For example, a close region event descriptor will have a qualifier of 109 * HBASE::REGION_EVENT::REGION_CLOSE. See WAL.proto and the EventType in RegionEventDescriptor 110 * protos for all possible event types. 111 */ 112 private static final String REGION_EVENT_STR = "HBASE::REGION_EVENT"; 113 private static final String REGION_EVENT_PREFIX_STR = REGION_EVENT_STR + "::"; 114 private static final byte[] REGION_EVENT_PREFIX = Bytes.toBytes(REGION_EVENT_PREFIX_STR); 115 116 /** 117 * @deprecated Since 2.3.0. Remove. Not for external use. Not used. 118 */ 119 @Deprecated 120 public static final byte[] REGION_EVENT = Bytes.toBytes(REGION_EVENT_STR); 121 122 /** 123 * We use this define figuring if we are carrying a close event. 124 */ 125 private static final byte[] REGION_EVENT_CLOSE = 126 createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType.REGION_CLOSE); 127 128 @InterfaceAudience.Private 129 public static final byte[] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD"); 130 131 /** 132 * Periodically {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationMarkerChore} 133 * will create marker edits with family as {@link WALEdit#METAFAMILY} and 134 * {@link WALEdit#REPLICATION_MARKER} as qualifier and an empty value. 135 * org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader will populate the 136 * Replication Marker edit with region_server_name, wal_name and wal_offset encoded in 137 * {@link org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ReplicationMarkerDescriptor} 138 * object. {@link org.apache.hadoop.hbase.replication.regionserver.Replication} will change the 139 * REPLICATION_SCOPE for this edit to GLOBAL so that it can replicate. On the sink cluster, 140 * {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationSink} will convert the 141 * ReplicationMarkerDescriptor into a Put mutation to REPLICATION_SINK_TRACKER_TABLE_NAME_STR 142 * table. 143 */ 144 @InterfaceAudience.Private 145 public static final byte[] REPLICATION_MARKER = Bytes.toBytes("HBASE::REPLICATION_MARKER"); 146 147 private final transient boolean replay; 148 149 private ArrayList<ExtendedCell> cells; 150 151 /** 152 * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and 153 * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have to recalculate the 154 * Set of families in a transaction; makes for a bunch of CPU savings. 155 */ 156 private Set<byte[]> families = null; 157 158 public WALEdit() { 159 this(1, false); 160 } 161 162 /** 163 * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)} 164 * instead. 165 * @see #WALEdit(int, boolean) 166 * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a> 167 */ 168 @Deprecated 169 public WALEdit(boolean replay) { 170 this(1, replay); 171 } 172 173 /** 174 * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)} 175 * instead. 176 * @see #WALEdit(int, boolean) 177 * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a> 178 */ 179 @Deprecated 180 public WALEdit(int cellCount) { 181 this(cellCount, false); 182 } 183 184 /** 185 * @param cellCount Pass so can pre-size the WALEdit. Optimization. 186 */ 187 public WALEdit(int cellCount, boolean isReplay) { 188 this.replay = isReplay; 189 cells = new ArrayList<>(cellCount); 190 } 191 192 /** 193 * Create a new WALEdit from a existing {@link WALEdit}. 194 */ 195 public WALEdit(WALEdit walEdit) { 196 this.replay = walEdit.replay; 197 cells = new ArrayList<>(walEdit.cells); 198 if (walEdit.families != null) { 199 this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR); 200 this.families.addAll(walEdit.families); 201 } 202 203 } 204 205 private Set<byte[]> getOrCreateFamilies() { 206 if (this.families == null) { 207 this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR); 208 } 209 return this.families; 210 } 211 212 /** 213 * For use by FSWALEntry ONLY. An optimization. 214 * @return All families in {@link #getCells()}; may be null. 215 */ 216 public Set<byte[]> getFamilies() { 217 return this.families; 218 } 219 220 /** 221 * @return True is <code>f</code> is {@link #METAFAMILY} 222 * @deprecated Since 2.3.0. Do not expose. Make protected. 223 */ 224 @Deprecated 225 public static boolean isMetaEditFamily(final byte[] f) { 226 return Bytes.equals(METAFAMILY, f); 227 } 228 229 /** 230 * Replaying WALs can read Cell-at-a-time so need this method in those cases. 231 */ 232 public static boolean isMetaEditFamily(Cell cell) { 233 return CellUtil.matchingFamily(cell, METAFAMILY); 234 } 235 236 /** 237 * @return True if this is a meta edit; has one edit only and its columnfamily is 238 * {@link #METAFAMILY}. 239 */ 240 public boolean isMetaEdit() { 241 return this.families != null && this.families.size() == 1 && this.families.contains(METAFAMILY); 242 } 243 244 /** 245 * @return True when current WALEdit is created by log replay. Replication skips WALEdits from 246 * replay. 247 */ 248 public boolean isReplay() { 249 return this.replay; 250 } 251 252 public WALEdit add(Cell cell, byte[] family) { 253 return add(PrivateCellUtil.ensureExtendedCell(cell), family); 254 } 255 256 WALEdit add(ExtendedCell cell, byte[] family) { 257 getOrCreateFamilies().add(family); 258 return addCell(cell); 259 } 260 261 public WALEdit add(Cell cell) { 262 return add(PrivateCellUtil.ensureExtendedCell(cell)); 263 } 264 265 WALEdit add(ExtendedCell cell) { 266 // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use 267 // add(Map) or add(Cell, family). 268 return add(cell, CellUtil.cloneFamily(cell)); 269 } 270 271 WALEdit add(List<ExtendedCell> cells) { 272 if (cells == null || cells.isEmpty()) { 273 return this; 274 } 275 for (ExtendedCell cell : cells) { 276 add(cell); 277 } 278 return this; 279 } 280 281 public boolean isEmpty() { 282 return cells.isEmpty(); 283 } 284 285 public int size() { 286 return cells.size(); 287 } 288 289 public ArrayList<Cell> getCells() { 290 return (ArrayList) cells; 291 } 292 293 List<ExtendedCell> getExtendedCells() { 294 return cells; 295 } 296 297 /** 298 * This is just for keeping compatibility for CPs, in HBase you should call the below 299 * {@link #setExtendedCells(ArrayList)} directly to avoid casting. 300 */ 301 void setCells(ArrayList<Cell> cells) { 302 this.cells = new ArrayList<>((ArrayList) cells); 303 this.families = null; 304 } 305 306 /** 307 * This is not thread safe. This will change the WALEdit and shouldn't be used unless you are sure 308 * that nothing else depends on the contents being immutable. 309 * @param cells the list of cells that this WALEdit now contains. 310 */ 311 // Used by replay. 312 void setExtendedCells(ArrayList<ExtendedCell> cells) { 313 this.cells = cells; 314 this.families = null; 315 } 316 317 /** 318 * Reads WALEdit from cells. 319 * @param cellDecoder Cell decoder. 320 * @param expectedCount Expected cell count. 321 * @return Number of KVs read. 322 */ 323 public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException { 324 cells.clear(); 325 cells.ensureCapacity(expectedCount); 326 while (cells.size() < expectedCount && cellDecoder.advance()) { 327 add(cellDecoder.current()); 328 } 329 return cells.size(); 330 } 331 332 @Override 333 public long heapSize() { 334 long ret = ClassSize.ARRAYLIST; 335 for (Cell cell : cells) { 336 ret += cell.heapSize(); 337 } 338 return ret; 339 } 340 341 public long estimatedSerializedSizeOf() { 342 long ret = 0; 343 for (Cell cell : cells) { 344 ret += PrivateCellUtil.estimatedSerializedSizeOf(cell); 345 } 346 return ret; 347 } 348 349 @Override 350 public String toString() { 351 StringBuilder sb = new StringBuilder(); 352 353 sb.append("[#edits: ").append(cells.size()).append(" = <"); 354 for (Cell cell : cells) { 355 sb.append(cell); 356 sb.append("; "); 357 } 358 sb.append(">]"); 359 return sb.toString(); 360 } 361 362 public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) { 363 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH, 364 EnvironmentEdgeManager.currentTime(), f.toByteArray()); 365 return new WALEdit().add(kv, METAFAMILY); 366 } 367 368 public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException { 369 return CellUtil.matchingColumn(cell, METAFAMILY, FLUSH) 370 ? FlushDescriptor.parseFrom(CellUtil.cloneValue(cell)) 371 : null; 372 } 373 374 /** 375 * @return A meta Marker WALEdit that has a single Cell whose value is the passed in 376 * <code>regionEventDesc</code> serialized and whose row is this region, columnfamily is 377 * {@link #METAFAMILY} and qualifier is {@link #REGION_EVENT_PREFIX} + 378 * {@link RegionEventDescriptor#getEventType()}; for example 379 * HBASE::REGION_EVENT::REGION_CLOSE. 380 */ 381 public static WALEdit createRegionEventWALEdit(RegionInfo hri, 382 RegionEventDescriptor regionEventDesc) { 383 return createRegionEventWALEdit(getRowForRegion(hri), regionEventDesc); 384 } 385 386 @InterfaceAudience.Private 387 public static WALEdit createRegionEventWALEdit(byte[] rowForRegion, 388 RegionEventDescriptor regionEventDesc) { 389 KeyValue kv = new KeyValue(rowForRegion, METAFAMILY, 390 createRegionEventDescriptorQualifier(regionEventDesc.getEventType()), 391 EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray()); 392 return new WALEdit().add(kv, METAFAMILY); 393 } 394 395 /** 396 * @return Cell qualifier for the passed in RegionEventDescriptor Type; e.g. we'll return 397 * something like a byte array with HBASE::REGION_EVENT::REGION_OPEN in it. 398 */ 399 @InterfaceAudience.Private 400 public static byte[] createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType t) { 401 return Bytes.toBytes(REGION_EVENT_PREFIX_STR + t.toString()); 402 } 403 404 /** 405 * Public so can be accessed from regionserver.wal package. 406 * @return True if this is a Marker Edit and it is a RegionClose type. 407 */ 408 public boolean isRegionCloseMarker() { 409 return isMetaEdit() && PrivateCellUtil.matchingQualifier(this.cells.get(0), REGION_EVENT_CLOSE, 410 0, REGION_EVENT_CLOSE.length); 411 } 412 413 /** 414 * @return Returns a RegionEventDescriptor made by deserializing the content of the passed in 415 * <code>cell</code>, IFF the <code>cell</code> is a RegionEventDescriptor type WALEdit. 416 */ 417 public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException { 418 return CellUtil.matchingColumnFamilyAndQualifierPrefix(cell, METAFAMILY, REGION_EVENT_PREFIX) 419 ? RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell)) 420 : null; 421 } 422 423 /** Returns A Marker WALEdit that has <code>c</code> serialized as its value */ 424 public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) { 425 byte[] pbbytes = c.toByteArray(); 426 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION, 427 EnvironmentEdgeManager.currentTime(), pbbytes); 428 return new WALEdit().add(kv, METAFAMILY); // replication scope null so this won't be replicated 429 } 430 431 public static byte[] getRowForRegion(RegionInfo hri) { 432 byte[] startKey = hri.getStartKey(); 433 if (startKey.length == 0) { 434 // empty row key is not allowed in mutations because it is both the start key and the end key 435 // we return the smallest byte[] that is bigger (in lex comparison) than byte[0]. 436 return new byte[] { 0 }; 437 } 438 return startKey; 439 } 440 441 /** 442 * Deserialized and returns a CompactionDescriptor is the KeyValue contains one. 443 * @param kv the key value 444 * @return deserialized CompactionDescriptor or null. 445 */ 446 public static CompactionDescriptor getCompaction(Cell kv) throws IOException { 447 return isCompactionMarker(kv) ? CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)) : null; 448 } 449 450 /** 451 * Returns true if the given cell is a serialized {@link CompactionDescriptor} 452 * @see #getCompaction(Cell) 453 */ 454 public static boolean isCompactionMarker(Cell cell) { 455 return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION); 456 } 457 458 /** 459 * Create a bulk loader WALEdit 460 * @param hri The RegionInfo for the region in which we are bulk loading 461 * @param bulkLoadDescriptor The descriptor for the Bulk Loader 462 * @return The WALEdit for the BulkLoad 463 */ 464 public static WALEdit createBulkLoadEvent(RegionInfo hri, 465 WALProtos.BulkLoadDescriptor bulkLoadDescriptor) { 466 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, BULK_LOAD, 467 EnvironmentEdgeManager.currentTime(), bulkLoadDescriptor.toByteArray()); 468 return new WALEdit().add(kv, METAFAMILY); 469 } 470 471 /** 472 * Deserialized and returns a BulkLoadDescriptor from the passed in Cell 473 * @param cell the key value 474 * @return deserialized BulkLoadDescriptor or null. 475 */ 476 public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException { 477 return CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD) 478 ? WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell)) 479 : null; 480 } 481 482 /** 483 * This is just for keeping compatibility for CPs, in HBase you should call the below 484 * {@link #addMap(Map)} directly to avoid casting. 485 */ 486 public void add(Map<byte[], List<Cell>> familyMap) { 487 for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) { 488 // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects." 489 int listSize = e.getValue().size(); 490 // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)} 491 // and have it clone family each time. Optimization! 492 for (int i = 0; i < listSize; i++) { 493 addCell(PrivateCellUtil.ensureExtendedCell(e.getValue().get(i))); 494 } 495 addFamily(e.getKey()); 496 } 497 } 498 499 /** 500 * Append the given map of family-> edits to a WALEdit data structure. This does not write to 501 * the WAL itself. Note that as an optimization, we will stamp the Set of column families into the 502 * WALEdit to save on our having to calculate column families subsequently down in the actual WAL 503 * writing. 504 * @param familyMap map of family -> edits 505 */ 506 void addMap(Map<byte[], List<ExtendedCell>> familyMap) { 507 for (Map.Entry<byte[], List<ExtendedCell>> e : familyMap.entrySet()) { 508 // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects." 509 int listSize = e.getValue().size(); 510 // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)} 511 // and have it clone family each time. Optimization! 512 for (int i = 0; i < listSize; i++) { 513 addCell(e.getValue().get(i)); 514 } 515 addFamily(e.getKey()); 516 } 517 } 518 519 private void addFamily(byte[] family) { 520 getOrCreateFamilies().add(family); 521 } 522 523 private WALEdit addCell(ExtendedCell cell) { 524 this.cells.add(cell); 525 return this; 526 } 527 528 /** 529 * Creates a replication tracker edit with {@link #METAFAMILY} family and 530 * {@link #REPLICATION_MARKER} qualifier and has null value. 531 * @param rowKey rowkey 532 * @param timestamp timestamp 533 */ 534 public static WALEdit createReplicationMarkerEdit(byte[] rowKey, long timestamp) { 535 KeyValue kv = 536 new KeyValue(rowKey, METAFAMILY, REPLICATION_MARKER, timestamp, KeyValue.Type.Put); 537 return new WALEdit().add(kv); 538 } 539 540 /** 541 * Checks whether this edit is a replication marker edit. 542 * @param edit edit 543 * @return true if the cell within an edit has column = METAFAMILY and qualifier = 544 * REPLICATION_MARKER, false otherwise 545 */ 546 public static boolean isReplicationMarkerEdit(WALEdit edit) { 547 // Check just the first cell from the edit. ReplicationMarker edit will have only 1 cell. 548 return edit.getCells().size() == 1 549 && CellUtil.matchingColumn(edit.getCells().get(0), METAFAMILY, REPLICATION_MARKER); 550 } 551}