001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025import java.util.TreeSet;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.CellUtil;
028import org.apache.hadoop.hbase.ExtendedCell;
029import org.apache.hadoop.hbase.HBaseInterfaceAudience;
030import org.apache.hadoop.hbase.KeyValue;
031import org.apache.hadoop.hbase.PrivateCellUtil;
032import org.apache.hadoop.hbase.client.RegionInfo;
033import org.apache.hadoop.hbase.codec.Codec;
034import org.apache.hadoop.hbase.io.HeapSize;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.ClassSize;
037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
038import org.apache.yetus.audience.InterfaceAudience;
039
040import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
041import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor;
044
045/**
046 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects)
047 * that came in as a single transaction. All the edits for a given transaction are written out as a
048 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder.
049 * <p>
050 * A particular WALEdit 'type' is the 'meta' type used to mark key operational events in the WAL
051 * such as compaction, flush, or region open. These meta types do not traverse hbase memstores. They
052 * are edits made by the hbase system rather than edit data submitted by clients. They only show in
053 * the WAL. These 'Meta' types have not been formally specified (or made into an explicit class
054 * type). They evolved organically. HBASE-8457 suggests codifying a WALEdit 'type' by adding a type
055 * field to WALEdit that gets serialized into the WAL. TODO. Would have to work on the
056 * consumption-side. Reading WALs on replay we seem to consume a Cell-at-a-time rather than by
057 * WALEdit. We are already in the below going out of our way to figure particular types -- e.g. if a
058 * compaction, replay, or close meta Marker -- during normal processing so would make sense to do
059 * this. Current system is an awkward marking of Cell columnfamily as {@link #METAFAMILY} and then
060 * setting qualifier based off meta edit type. For replay-time where we read Cell-at-a-time, there
061 * are utility methods below for figuring meta type. See also
062 * {@link #createBulkLoadEvent(RegionInfo, WALProtos.BulkLoadDescriptor)}, etc., for where we create
063 * meta WALEdit instances.
064 * </p>
065 * <p>
066 * WALEdit will accumulate a Set of all column family names referenced by the Cells
067 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the column
068 * family name to-hand.. just shove it into the WALEdit if available. Doing this, we can save on a
069 * parse of each Cell to figure column family down the line when we go to add the WALEdit to the WAL
070 * file. See the hand-off in FSWALEntry Constructor.
071 * @see WALKey
072 */
073@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION,
074  HBaseInterfaceAudience.COPROC })
075public class WALEdit implements HeapSize {
076  // Below defines are for writing WALEdit 'meta' Cells..
077  // TODO: Get rid of this system of special 'meta' Cells. See HBASE-8457. It suggests
078  // adding a type to WALEdit itself for use denoting meta Edits and their types.
079  public static final byte[] METAFAMILY = Bytes.toBytes("METAFAMILY");
080
081  /**
082   * @deprecated Since 2.3.0. Not used.
083   */
084  @Deprecated
085  public static final byte[] METAROW = Bytes.toBytes("METAROW");
086
087  /**
088   * @deprecated Since 2.3.0. Make it protected, internal-use only. Use
089   *             {@link #isCompactionMarker(Cell)}
090   */
091  @Deprecated
092  @InterfaceAudience.Private
093  public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
094
095  /**
096   * @deprecated Since 2.3.0. Make it protected, internal-use only.
097   */
098  @Deprecated
099  @InterfaceAudience.Private
100  public static final byte[] FLUSH = Bytes.toBytes("HBASE::FLUSH");
101
102  /**
103   * Qualifier for region event meta 'Marker' WALEdits start with the {@link #REGION_EVENT_PREFIX}
104   * prefix ('HBASE::REGION_EVENT::'). After the prefix, we note the type of the event which we get
105   * from the RegionEventDescriptor protobuf instance type (A RegionEventDescriptor protobuf
106   * instance is written as the meta Marker Cell value). Adding a type suffix means we do not have
107   * to deserialize the protobuf to figure out what type of event this is.. .just read the qualifier
108   * suffix. For example, a close region event descriptor will have a qualifier of
109   * HBASE::REGION_EVENT::REGION_CLOSE. See WAL.proto and the EventType in RegionEventDescriptor
110   * protos for all possible event types.
111   */
112  private static final String REGION_EVENT_STR = "HBASE::REGION_EVENT";
113  private static final String REGION_EVENT_PREFIX_STR = REGION_EVENT_STR + "::";
114  private static final byte[] REGION_EVENT_PREFIX = Bytes.toBytes(REGION_EVENT_PREFIX_STR);
115
116  /**
117   * @deprecated Since 2.3.0. Remove. Not for external use. Not used.
118   */
119  @Deprecated
120  public static final byte[] REGION_EVENT = Bytes.toBytes(REGION_EVENT_STR);
121
122  /**
123   * We use this define figuring if we are carrying a close event.
124   */
125  private static final byte[] REGION_EVENT_CLOSE =
126    createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType.REGION_CLOSE);
127
128  @InterfaceAudience.Private
129  public static final byte[] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD");
130
131  /**
132   * Periodically {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationMarkerChore}
133   * will create marker edits with family as {@link WALEdit#METAFAMILY} and
134   * {@link WALEdit#REPLICATION_MARKER} as qualifier and an empty value.
135   * org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader will populate the
136   * Replication Marker edit with region_server_name, wal_name and wal_offset encoded in
137   * {@link org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ReplicationMarkerDescriptor}
138   * object. {@link org.apache.hadoop.hbase.replication.regionserver.Replication} will change the
139   * REPLICATION_SCOPE for this edit to GLOBAL so that it can replicate. On the sink cluster,
140   * {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationSink} will convert the
141   * ReplicationMarkerDescriptor into a Put mutation to REPLICATION_SINK_TRACKER_TABLE_NAME_STR
142   * table.
143   */
144  @InterfaceAudience.Private
145  public static final byte[] REPLICATION_MARKER = Bytes.toBytes("HBASE::REPLICATION_MARKER");
146
147  private final transient boolean replay;
148
149  private ArrayList<ExtendedCell> cells;
150
151  /**
152   * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and
153   * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have to recalculate the
154   * Set of families in a transaction; makes for a bunch of CPU savings.
155   */
156  private Set<byte[]> families = null;
157
158  public WALEdit() {
159    this(1, false);
160  }
161
162  /**
163   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
164   *             instead.
165   * @see #WALEdit(int, boolean)
166   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
167   */
168  @Deprecated
169  public WALEdit(boolean replay) {
170    this(1, replay);
171  }
172
173  /**
174   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
175   *             instead.
176   * @see #WALEdit(int, boolean)
177   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
178   */
179  @Deprecated
180  public WALEdit(int cellCount) {
181    this(cellCount, false);
182  }
183
184  /**
185   * @param cellCount Pass so can pre-size the WALEdit. Optimization.
186   */
187  public WALEdit(int cellCount, boolean isReplay) {
188    this.replay = isReplay;
189    cells = new ArrayList<>(cellCount);
190  }
191
192  /**
193   * Create a new WALEdit from a existing {@link WALEdit}.
194   */
195  public WALEdit(WALEdit walEdit) {
196    this.replay = walEdit.replay;
197    cells = new ArrayList<>(walEdit.cells);
198    if (walEdit.families != null) {
199      this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR);
200      this.families.addAll(walEdit.families);
201    }
202
203  }
204
205  private Set<byte[]> getOrCreateFamilies() {
206    if (this.families == null) {
207      this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR);
208    }
209    return this.families;
210  }
211
212  /**
213   * For use by FSWALEntry ONLY. An optimization.
214   * @return All families in {@link #getCells()}; may be null.
215   */
216  public Set<byte[]> getFamilies() {
217    return this.families;
218  }
219
220  /**
221   * @return True is <code>f</code> is {@link #METAFAMILY}
222   * @deprecated Since 2.3.0. Do not expose. Make protected.
223   */
224  @Deprecated
225  public static boolean isMetaEditFamily(final byte[] f) {
226    return Bytes.equals(METAFAMILY, f);
227  }
228
229  /**
230   * Replaying WALs can read Cell-at-a-time so need this method in those cases.
231   */
232  public static boolean isMetaEditFamily(Cell cell) {
233    return CellUtil.matchingFamily(cell, METAFAMILY);
234  }
235
236  /**
237   * @return True if this is a meta edit; has one edit only and its columnfamily is
238   *         {@link #METAFAMILY}.
239   */
240  public boolean isMetaEdit() {
241    return this.families != null && this.families.size() == 1 && this.families.contains(METAFAMILY);
242  }
243
244  /**
245   * @return True when current WALEdit is created by log replay. Replication skips WALEdits from
246   *         replay.
247   */
248  public boolean isReplay() {
249    return this.replay;
250  }
251
252  public WALEdit add(Cell cell, byte[] family) {
253    return add(PrivateCellUtil.ensureExtendedCell(cell), family);
254  }
255
256  WALEdit add(ExtendedCell cell, byte[] family) {
257    getOrCreateFamilies().add(family);
258    return addCell(cell);
259  }
260
261  public WALEdit add(Cell cell) {
262    return add(PrivateCellUtil.ensureExtendedCell(cell));
263  }
264
265  WALEdit add(ExtendedCell cell) {
266    // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use
267    // add(Map) or add(Cell, family).
268    return add(cell, CellUtil.cloneFamily(cell));
269  }
270
271  WALEdit add(List<ExtendedCell> cells) {
272    if (cells == null || cells.isEmpty()) {
273      return this;
274    }
275    for (ExtendedCell cell : cells) {
276      add(cell);
277    }
278    return this;
279  }
280
281  public boolean isEmpty() {
282    return cells.isEmpty();
283  }
284
285  public int size() {
286    return cells.size();
287  }
288
289  public ArrayList<Cell> getCells() {
290    return (ArrayList) cells;
291  }
292
293  List<ExtendedCell> getExtendedCells() {
294    return cells;
295  }
296
297  /**
298   * This is just for keeping compatibility for CPs, in HBase you should call the below
299   * {@link #setExtendedCells(ArrayList)} directly to avoid casting.
300   */
301  void setCells(ArrayList<Cell> cells) {
302    this.cells = new ArrayList<>((ArrayList) cells);
303    this.families = null;
304  }
305
306  /**
307   * This is not thread safe. This will change the WALEdit and shouldn't be used unless you are sure
308   * that nothing else depends on the contents being immutable.
309   * @param cells the list of cells that this WALEdit now contains.
310   */
311  // Used by replay.
312  void setExtendedCells(ArrayList<ExtendedCell> cells) {
313    this.cells = cells;
314    this.families = null;
315  }
316
317  /**
318   * Reads WALEdit from cells.
319   * @param cellDecoder   Cell decoder.
320   * @param expectedCount Expected cell count.
321   * @return Number of KVs read.
322   */
323  public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException {
324    cells.clear();
325    cells.ensureCapacity(expectedCount);
326    while (cells.size() < expectedCount && cellDecoder.advance()) {
327      add(cellDecoder.current());
328    }
329    return cells.size();
330  }
331
332  @Override
333  public long heapSize() {
334    long ret = ClassSize.ARRAYLIST;
335    for (Cell cell : cells) {
336      ret += cell.heapSize();
337    }
338    return ret;
339  }
340
341  public long estimatedSerializedSizeOf() {
342    long ret = 0;
343    for (Cell cell : cells) {
344      ret += PrivateCellUtil.estimatedSerializedSizeOf(cell);
345    }
346    return ret;
347  }
348
349  @Override
350  public String toString() {
351    StringBuilder sb = new StringBuilder();
352
353    sb.append("[#edits: ").append(cells.size()).append(" = <");
354    for (Cell cell : cells) {
355      sb.append(cell);
356      sb.append("; ");
357    }
358    sb.append(">]");
359    return sb.toString();
360  }
361
362  public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) {
363    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH,
364      EnvironmentEdgeManager.currentTime(), f.toByteArray());
365    return new WALEdit().add(kv, METAFAMILY);
366  }
367
368  public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException {
369    return CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)
370      ? FlushDescriptor.parseFrom(CellUtil.cloneValue(cell))
371      : null;
372  }
373
374  /**
375   * @return A meta Marker WALEdit that has a single Cell whose value is the passed in
376   *         <code>regionEventDesc</code> serialized and whose row is this region, columnfamily is
377   *         {@link #METAFAMILY} and qualifier is {@link #REGION_EVENT_PREFIX} +
378   *         {@link RegionEventDescriptor#getEventType()}; for example
379   *         HBASE::REGION_EVENT::REGION_CLOSE.
380   */
381  public static WALEdit createRegionEventWALEdit(RegionInfo hri,
382    RegionEventDescriptor regionEventDesc) {
383    return createRegionEventWALEdit(getRowForRegion(hri), regionEventDesc);
384  }
385
386  @InterfaceAudience.Private
387  public static WALEdit createRegionEventWALEdit(byte[] rowForRegion,
388    RegionEventDescriptor regionEventDesc) {
389    KeyValue kv = new KeyValue(rowForRegion, METAFAMILY,
390      createRegionEventDescriptorQualifier(regionEventDesc.getEventType()),
391      EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray());
392    return new WALEdit().add(kv, METAFAMILY);
393  }
394
395  /**
396   * @return Cell qualifier for the passed in RegionEventDescriptor Type; e.g. we'll return
397   *         something like a byte array with HBASE::REGION_EVENT::REGION_OPEN in it.
398   */
399  @InterfaceAudience.Private
400  public static byte[] createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType t) {
401    return Bytes.toBytes(REGION_EVENT_PREFIX_STR + t.toString());
402  }
403
404  /**
405   * Public so can be accessed from regionserver.wal package.
406   * @return True if this is a Marker Edit and it is a RegionClose type.
407   */
408  public boolean isRegionCloseMarker() {
409    return isMetaEdit() && PrivateCellUtil.matchingQualifier(this.cells.get(0), REGION_EVENT_CLOSE,
410      0, REGION_EVENT_CLOSE.length);
411  }
412
413  /**
414   * @return Returns a RegionEventDescriptor made by deserializing the content of the passed in
415   *         <code>cell</code>, IFF the <code>cell</code> is a RegionEventDescriptor type WALEdit.
416   */
417  public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException {
418    return CellUtil.matchingColumnFamilyAndQualifierPrefix(cell, METAFAMILY, REGION_EVENT_PREFIX)
419      ? RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell))
420      : null;
421  }
422
423  /** Returns A Marker WALEdit that has <code>c</code> serialized as its value */
424  public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) {
425    byte[] pbbytes = c.toByteArray();
426    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION,
427      EnvironmentEdgeManager.currentTime(), pbbytes);
428    return new WALEdit().add(kv, METAFAMILY); // replication scope null so this won't be replicated
429  }
430
431  public static byte[] getRowForRegion(RegionInfo hri) {
432    byte[] startKey = hri.getStartKey();
433    if (startKey.length == 0) {
434      // empty row key is not allowed in mutations because it is both the start key and the end key
435      // we return the smallest byte[] that is bigger (in lex comparison) than byte[0].
436      return new byte[] { 0 };
437    }
438    return startKey;
439  }
440
441  /**
442   * Deserialized and returns a CompactionDescriptor is the KeyValue contains one.
443   * @param kv the key value
444   * @return deserialized CompactionDescriptor or null.
445   */
446  public static CompactionDescriptor getCompaction(Cell kv) throws IOException {
447    return isCompactionMarker(kv) ? CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)) : null;
448  }
449
450  /**
451   * Returns true if the given cell is a serialized {@link CompactionDescriptor}
452   * @see #getCompaction(Cell)
453   */
454  public static boolean isCompactionMarker(Cell cell) {
455    return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION);
456  }
457
458  /**
459   * Create a bulk loader WALEdit
460   * @param hri                The RegionInfo for the region in which we are bulk loading
461   * @param bulkLoadDescriptor The descriptor for the Bulk Loader
462   * @return The WALEdit for the BulkLoad
463   */
464  public static WALEdit createBulkLoadEvent(RegionInfo hri,
465    WALProtos.BulkLoadDescriptor bulkLoadDescriptor) {
466    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, BULK_LOAD,
467      EnvironmentEdgeManager.currentTime(), bulkLoadDescriptor.toByteArray());
468    return new WALEdit().add(kv, METAFAMILY);
469  }
470
471  /**
472   * Deserialized and returns a BulkLoadDescriptor from the passed in Cell
473   * @param cell the key value
474   * @return deserialized BulkLoadDescriptor or null.
475   */
476  public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException {
477    return CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)
478      ? WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell))
479      : null;
480  }
481
482  /**
483   * This is just for keeping compatibility for CPs, in HBase you should call the below
484   * {@link #addMap(Map)} directly to avoid casting.
485   */
486  public void add(Map<byte[], List<Cell>> familyMap) {
487    for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
488      // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects."
489      int listSize = e.getValue().size();
490      // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)}
491      // and have it clone family each time. Optimization!
492      for (int i = 0; i < listSize; i++) {
493        addCell(PrivateCellUtil.ensureExtendedCell(e.getValue().get(i)));
494      }
495      addFamily(e.getKey());
496    }
497  }
498
499  /**
500   * Append the given map of family-&gt; edits to a WALEdit data structure. This does not write to
501   * the WAL itself. Note that as an optimization, we will stamp the Set of column families into the
502   * WALEdit to save on our having to calculate column families subsequently down in the actual WAL
503   * writing.
504   * @param familyMap map of family -&gt; edits
505   */
506  void addMap(Map<byte[], List<ExtendedCell>> familyMap) {
507    for (Map.Entry<byte[], List<ExtendedCell>> e : familyMap.entrySet()) {
508      // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects."
509      int listSize = e.getValue().size();
510      // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)}
511      // and have it clone family each time. Optimization!
512      for (int i = 0; i < listSize; i++) {
513        addCell(e.getValue().get(i));
514      }
515      addFamily(e.getKey());
516    }
517  }
518
519  private void addFamily(byte[] family) {
520    getOrCreateFamilies().add(family);
521  }
522
523  private WALEdit addCell(ExtendedCell cell) {
524    this.cells.add(cell);
525    return this;
526  }
527
528  /**
529   * Creates a replication tracker edit with {@link #METAFAMILY} family and
530   * {@link #REPLICATION_MARKER} qualifier and has null value.
531   * @param rowKey    rowkey
532   * @param timestamp timestamp
533   */
534  public static WALEdit createReplicationMarkerEdit(byte[] rowKey, long timestamp) {
535    KeyValue kv =
536      new KeyValue(rowKey, METAFAMILY, REPLICATION_MARKER, timestamp, KeyValue.Type.Put);
537    return new WALEdit().add(kv);
538  }
539
540  /**
541   * Checks whether this edit is a replication marker edit.
542   * @param edit edit
543   * @return true if the cell within an edit has column = METAFAMILY and qualifier =
544   *         REPLICATION_MARKER, false otherwise
545   */
546  public static boolean isReplicationMarkerEdit(WALEdit edit) {
547    // Check just the first cell from the edit. ReplicationMarker edit will have only 1 cell.
548    return edit.getCells().size() == 1
549      && CellUtil.matchingColumn(edit.getCells().get(0), METAFAMILY, REPLICATION_MARKER);
550  }
551}