001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025import java.util.TreeSet;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.CellUtil;
028import org.apache.hadoop.hbase.HBaseInterfaceAudience;
029import org.apache.hadoop.hbase.KeyValue;
030import org.apache.hadoop.hbase.PrivateCellUtil;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.codec.Codec;
033import org.apache.hadoop.hbase.io.HeapSize;
034import org.apache.hadoop.hbase.util.Bytes;
035import org.apache.hadoop.hbase.util.ClassSize;
036import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
037import org.apache.yetus.audience.InterfaceAudience;
038
039import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
041import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor;
043
044/**
045 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects)
046 * that came in as a single transaction. All the edits for a given transaction are written out as a
047 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder.
048 * <p>
049 * This class is LimitedPrivate for CPs to read-only. The {@link #add} methods are classified as
050 * private methods, not for use by CPs.
051 * </p>
052 * <p>
053 * A particular WALEdit 'type' is the 'meta' type used to mark key operational events in the WAL
054 * such as compaction, flush, or region open. These meta types do not traverse hbase memstores. They
055 * are edits made by the hbase system rather than edit data submitted by clients. They only show in
056 * the WAL. These 'Meta' types have not been formally specified (or made into an explicit class
057 * type). They evolved organically. HBASE-8457 suggests codifying a WALEdit 'type' by adding a type
058 * field to WALEdit that gets serialized into the WAL. TODO. Would have to work on the
059 * consumption-side. Reading WALs on replay we seem to consume a Cell-at-a-time rather than by
060 * WALEdit. We are already in the below going out of our way to figure particular types -- e.g. if a
061 * compaction, replay, or close meta Marker -- during normal processing so would make sense to do
062 * this. Current system is an awkward marking of Cell columnfamily as {@link #METAFAMILY} and then
063 * setting qualifier based off meta edit type. For replay-time where we read Cell-at-a-time, there
064 * are utility methods below for figuring meta type. See also
065 * {@link #createBulkLoadEvent(RegionInfo, WALProtos.BulkLoadDescriptor)}, etc., for where we create
066 * meta WALEdit instances.
067 * </p>
068 * <p>
069 * WALEdit will accumulate a Set of all column family names referenced by the Cells
070 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the column
071 * family name to-hand.. just shove it into the WALEdit if available. Doing this, we can save on a
072 * parse of each Cell to figure column family down the line when we go to add the WALEdit to the WAL
073 * file. See the hand-off in FSWALEntry Constructor.
074 * @see WALKey
075 */
076// TODO: Do not expose this class to Coprocessors. It has set methods. A CP might meddle.
077@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION,
078  HBaseInterfaceAudience.COPROC })
079public class WALEdit implements HeapSize {
080  // Below defines are for writing WALEdit 'meta' Cells..
081  // TODO: Get rid of this system of special 'meta' Cells. See HBASE-8457. It suggests
082  // adding a type to WALEdit itself for use denoting meta Edits and their types.
083  public static final byte[] METAFAMILY = Bytes.toBytes("METAFAMILY");
084
085  /**
086   * @deprecated Since 2.3.0. Not used.
087   */
088  @Deprecated
089  public static final byte[] METAROW = Bytes.toBytes("METAROW");
090
091  /**
092   * @deprecated Since 2.3.0. Make it protected, internal-use only. Use
093   *             {@link #isCompactionMarker(Cell)}
094   */
095  @Deprecated
096  @InterfaceAudience.Private
097  public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
098
099  /**
100   * @deprecated Since 2.3.0. Make it protected, internal-use only.
101   */
102  @Deprecated
103  @InterfaceAudience.Private
104  public static final byte[] FLUSH = Bytes.toBytes("HBASE::FLUSH");
105
106  /**
107   * Qualifier for region event meta 'Marker' WALEdits start with the {@link #REGION_EVENT_PREFIX}
108   * prefix ('HBASE::REGION_EVENT::'). After the prefix, we note the type of the event which we get
109   * from the RegionEventDescriptor protobuf instance type (A RegionEventDescriptor protobuf
110   * instance is written as the meta Marker Cell value). Adding a type suffix means we do not have
111   * to deserialize the protobuf to figure out what type of event this is.. .just read the qualifier
112   * suffix. For example, a close region event descriptor will have a qualifier of
113   * HBASE::REGION_EVENT::REGION_CLOSE. See WAL.proto and the EventType in RegionEventDescriptor
114   * protos for all possible event types.
115   */
116  private static final String REGION_EVENT_STR = "HBASE::REGION_EVENT";
117  private static final String REGION_EVENT_PREFIX_STR = REGION_EVENT_STR + "::";
118  private static final byte[] REGION_EVENT_PREFIX = Bytes.toBytes(REGION_EVENT_PREFIX_STR);
119
120  /**
121   * @deprecated Since 2.3.0. Remove. Not for external use. Not used.
122   */
123  @Deprecated
124  public static final byte[] REGION_EVENT = Bytes.toBytes(REGION_EVENT_STR);
125
126  /**
127   * We use this define figuring if we are carrying a close event.
128   */
129  private static final byte[] REGION_EVENT_CLOSE =
130    createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType.REGION_CLOSE);
131
132  @InterfaceAudience.Private
133  public static final byte[] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD");
134
135  /**
136   * Periodically {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationMarkerChore}
137   * will create marker edits with family as {@link WALEdit#METAFAMILY} and
138   * {@link WALEdit#REPLICATION_MARKER} as qualifier and an empty value.
139   * org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader will populate the
140   * Replication Marker edit with region_server_name, wal_name and wal_offset encoded in
141   * {@link org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.ReplicationMarkerDescriptor}
142   * object. {@link org.apache.hadoop.hbase.replication.regionserver.Replication} will change the
143   * REPLICATION_SCOPE for this edit to GLOBAL so that it can replicate. On the sink cluster,
144   * {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationSink} will convert the
145   * ReplicationMarkerDescriptor into a Put mutation to REPLICATION_SINK_TRACKER_TABLE_NAME_STR
146   * table.
147   */
148  @InterfaceAudience.Private
149  public static final byte[] REPLICATION_MARKER = Bytes.toBytes("HBASE::REPLICATION_MARKER");
150
151  private final transient boolean replay;
152
153  private ArrayList<Cell> cells;
154
155  /**
156   * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and
157   * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have to recalculate the
158   * Set of families in a transaction; makes for a bunch of CPU savings.
159   */
160  private Set<byte[]> families = null;
161
162  public WALEdit() {
163    this(1, false);
164  }
165
166  /**
167   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
168   *             instead.
169   * @see #WALEdit(int, boolean)
170   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
171   */
172  @Deprecated
173  public WALEdit(boolean replay) {
174    this(1, replay);
175  }
176
177  /**
178   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
179   *             instead.
180   * @see #WALEdit(int, boolean)
181   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
182   */
183  @Deprecated
184  public WALEdit(int cellCount) {
185    this(cellCount, false);
186  }
187
188  /**
189   * @param cellCount Pass so can pre-size the WALEdit. Optimization.
190   */
191  public WALEdit(int cellCount, boolean isReplay) {
192    this.replay = isReplay;
193    cells = new ArrayList<>(cellCount);
194  }
195
196  /**
197   * Create a new WALEdit from a existing {@link WALEdit}.
198   */
199  public WALEdit(WALEdit walEdit) {
200    this.replay = walEdit.replay;
201    cells = new ArrayList<>(walEdit.cells);
202    if (walEdit.families != null) {
203      this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR);
204      this.families.addAll(walEdit.families);
205    }
206
207  }
208
209  private Set<byte[]> getOrCreateFamilies() {
210    if (this.families == null) {
211      this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR);
212    }
213    return this.families;
214  }
215
216  /**
217   * For use by FSWALEntry ONLY. An optimization.
218   * @return All families in {@link #getCells()}; may be null.
219   */
220  public Set<byte[]> getFamilies() {
221    return this.families;
222  }
223
224  /**
225   * @return True is <code>f</code> is {@link #METAFAMILY}
226   * @deprecated Since 2.3.0. Do not expose. Make protected.
227   */
228  @Deprecated
229  public static boolean isMetaEditFamily(final byte[] f) {
230    return Bytes.equals(METAFAMILY, f);
231  }
232
233  /**
234   * Replaying WALs can read Cell-at-a-time so need this method in those cases.
235   */
236  public static boolean isMetaEditFamily(Cell cell) {
237    return CellUtil.matchingFamily(cell, METAFAMILY);
238  }
239
240  /**
241   * @return True if this is a meta edit; has one edit only and its columnfamily is
242   *         {@link #METAFAMILY}.
243   */
244  public boolean isMetaEdit() {
245    return this.families != null && this.families.size() == 1 && this.families.contains(METAFAMILY);
246  }
247
248  /**
249   * @return True when current WALEdit is created by log replay. Replication skips WALEdits from
250   *         replay.
251   */
252  public boolean isReplay() {
253    return this.replay;
254  }
255
256  @InterfaceAudience.Private
257  public WALEdit add(Cell cell, byte[] family) {
258    getOrCreateFamilies().add(family);
259    return addCell(cell);
260  }
261
262  @InterfaceAudience.Private
263  public WALEdit add(Cell cell) {
264    // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use
265    // add(Map) or add(Cell, family).
266    return add(cell, CellUtil.cloneFamily(cell));
267  }
268
269  @InterfaceAudience.Private
270  public WALEdit add(List<Cell> cells) {
271    if (cells == null || cells.isEmpty()) {
272      return this;
273    }
274    for (Cell cell : cells) {
275      add(cell);
276    }
277    return this;
278  }
279
280  public boolean isEmpty() {
281    return cells.isEmpty();
282  }
283
284  public int size() {
285    return cells.size();
286  }
287
288  public ArrayList<Cell> getCells() {
289    return cells;
290  }
291
292  /**
293   * This is not thread safe. This will change the WALEdit and shouldn't be used unless you are sure
294   * that nothing else depends on the contents being immutable.
295   * @param cells the list of cells that this WALEdit now contains.
296   */
297  @InterfaceAudience.Private
298  // Used by replay.
299  public void setCells(ArrayList<Cell> cells) {
300    this.cells = cells;
301    this.families = null;
302  }
303
304  /**
305   * Reads WALEdit from cells.
306   * @param cellDecoder   Cell decoder.
307   * @param expectedCount Expected cell count.
308   * @return Number of KVs read.
309   */
310  public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException {
311    cells.clear();
312    cells.ensureCapacity(expectedCount);
313    while (cells.size() < expectedCount && cellDecoder.advance()) {
314      add(cellDecoder.current());
315    }
316    return cells.size();
317  }
318
319  @Override
320  public long heapSize() {
321    long ret = ClassSize.ARRAYLIST;
322    for (Cell cell : cells) {
323      ret += cell.heapSize();
324    }
325    return ret;
326  }
327
328  public long estimatedSerializedSizeOf() {
329    long ret = 0;
330    for (Cell cell : cells) {
331      ret += PrivateCellUtil.estimatedSerializedSizeOf(cell);
332    }
333    return ret;
334  }
335
336  @Override
337  public String toString() {
338    StringBuilder sb = new StringBuilder();
339
340    sb.append("[#edits: ").append(cells.size()).append(" = <");
341    for (Cell cell : cells) {
342      sb.append(cell);
343      sb.append("; ");
344    }
345    sb.append(">]");
346    return sb.toString();
347  }
348
349  public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) {
350    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH,
351      EnvironmentEdgeManager.currentTime(), f.toByteArray());
352    return new WALEdit().add(kv, METAFAMILY);
353  }
354
355  public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException {
356    return CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)
357      ? FlushDescriptor.parseFrom(CellUtil.cloneValue(cell))
358      : null;
359  }
360
361  /**
362   * @return A meta Marker WALEdit that has a single Cell whose value is the passed in
363   *         <code>regionEventDesc</code> serialized and whose row is this region, columnfamily is
364   *         {@link #METAFAMILY} and qualifier is {@link #REGION_EVENT_PREFIX} +
365   *         {@link RegionEventDescriptor#getEventType()}; for example
366   *         HBASE::REGION_EVENT::REGION_CLOSE.
367   */
368  public static WALEdit createRegionEventWALEdit(RegionInfo hri,
369    RegionEventDescriptor regionEventDesc) {
370    return createRegionEventWALEdit(getRowForRegion(hri), regionEventDesc);
371  }
372
373  @InterfaceAudience.Private
374  public static WALEdit createRegionEventWALEdit(byte[] rowForRegion,
375    RegionEventDescriptor regionEventDesc) {
376    KeyValue kv = new KeyValue(rowForRegion, METAFAMILY,
377      createRegionEventDescriptorQualifier(regionEventDesc.getEventType()),
378      EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray());
379    return new WALEdit().add(kv, METAFAMILY);
380  }
381
382  /**
383   * @return Cell qualifier for the passed in RegionEventDescriptor Type; e.g. we'll return
384   *         something like a byte array with HBASE::REGION_EVENT::REGION_OPEN in it.
385   */
386  @InterfaceAudience.Private
387  public static byte[] createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType t) {
388    return Bytes.toBytes(REGION_EVENT_PREFIX_STR + t.toString());
389  }
390
391  /**
392   * Public so can be accessed from regionserver.wal package.
393   * @return True if this is a Marker Edit and it is a RegionClose type.
394   */
395  public boolean isRegionCloseMarker() {
396    return isMetaEdit() && PrivateCellUtil.matchingQualifier(this.cells.get(0), REGION_EVENT_CLOSE,
397      0, REGION_EVENT_CLOSE.length);
398  }
399
400  /**
401   * @return Returns a RegionEventDescriptor made by deserializing the content of the passed in
402   *         <code>cell</code>, IFF the <code>cell</code> is a RegionEventDescriptor type WALEdit.
403   */
404  public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException {
405    return CellUtil.matchingColumnFamilyAndQualifierPrefix(cell, METAFAMILY, REGION_EVENT_PREFIX)
406      ? RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell))
407      : null;
408  }
409
410  /** Returns A Marker WALEdit that has <code>c</code> serialized as its value */
411  public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) {
412    byte[] pbbytes = c.toByteArray();
413    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION,
414      EnvironmentEdgeManager.currentTime(), pbbytes);
415    return new WALEdit().add(kv, METAFAMILY); // replication scope null so this won't be replicated
416  }
417
418  public static byte[] getRowForRegion(RegionInfo hri) {
419    byte[] startKey = hri.getStartKey();
420    if (startKey.length == 0) {
421      // empty row key is not allowed in mutations because it is both the start key and the end key
422      // we return the smallest byte[] that is bigger (in lex comparison) than byte[0].
423      return new byte[] { 0 };
424    }
425    return startKey;
426  }
427
428  /**
429   * Deserialized and returns a CompactionDescriptor is the KeyValue contains one.
430   * @param kv the key value
431   * @return deserialized CompactionDescriptor or null.
432   */
433  public static CompactionDescriptor getCompaction(Cell kv) throws IOException {
434    return isCompactionMarker(kv) ? CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)) : null;
435  }
436
437  /**
438   * Returns true if the given cell is a serialized {@link CompactionDescriptor}
439   * @see #getCompaction(Cell)
440   */
441  public static boolean isCompactionMarker(Cell cell) {
442    return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION);
443  }
444
445  /**
446   * Create a bulk loader WALEdit
447   * @param hri                The RegionInfo for the region in which we are bulk loading
448   * @param bulkLoadDescriptor The descriptor for the Bulk Loader
449   * @return The WALEdit for the BulkLoad
450   */
451  public static WALEdit createBulkLoadEvent(RegionInfo hri,
452    WALProtos.BulkLoadDescriptor bulkLoadDescriptor) {
453    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, BULK_LOAD,
454      EnvironmentEdgeManager.currentTime(), bulkLoadDescriptor.toByteArray());
455    return new WALEdit().add(kv, METAFAMILY);
456  }
457
458  /**
459   * Deserialized and returns a BulkLoadDescriptor from the passed in Cell
460   * @param cell the key value
461   * @return deserialized BulkLoadDescriptor or null.
462   */
463  public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException {
464    return CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)
465      ? WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell))
466      : null;
467  }
468
469  /**
470   * Append the given map of family->edits to a WALEdit data structure. This does not write to the
471   * WAL itself. Note that as an optimization, we will stamp the Set of column families into the
472   * WALEdit to save on our having to calculate column families subsequently down in the actual WAL
473   * writing.
474   * @param familyMap map of family->edits
475   */
476  public void add(Map<byte[], List<Cell>> familyMap) {
477    for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
478      // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects."
479      int listSize = e.getValue().size();
480      // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)}
481      // and have it clone family each time. Optimization!
482      for (int i = 0; i < listSize; i++) {
483        addCell(e.getValue().get(i));
484      }
485      addFamily(e.getKey());
486    }
487  }
488
489  private void addFamily(byte[] family) {
490    getOrCreateFamilies().add(family);
491  }
492
493  private WALEdit addCell(Cell cell) {
494    this.cells.add(cell);
495    return this;
496  }
497
498  /**
499   * Creates a replication tracker edit with {@link #METAFAMILY} family and
500   * {@link #REPLICATION_MARKER} qualifier and has null value.
501   * @param rowKey    rowkey
502   * @param timestamp timestamp
503   */
504  public static WALEdit createReplicationMarkerEdit(byte[] rowKey, long timestamp) {
505    KeyValue kv =
506      new KeyValue(rowKey, METAFAMILY, REPLICATION_MARKER, timestamp, KeyValue.Type.Put);
507    return new WALEdit().add(kv);
508  }
509
510  /**
511   * Checks whether this edit is a replication marker edit.
512   * @param edit edit
513   * @return true if the cell within an edit has column = METAFAMILY and qualifier =
514   *         REPLICATION_MARKER, false otherwise
515   */
516  public static boolean isReplicationMarkerEdit(WALEdit edit) {
517    // Check just the first cell from the edit. ReplicationMarker edit will have only 1 cell.
518    return edit.getCells().size() == 1
519      && CellUtil.matchingColumn(edit.getCells().get(0), METAFAMILY, REPLICATION_MARKER);
520  }
521}