001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Map;
024import java.util.Set;
025import java.util.TreeSet;
026
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellUtil;
029import org.apache.hadoop.hbase.HBaseInterfaceAudience;
030import org.apache.hadoop.hbase.PrivateCellUtil;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.client.RegionInfo;
033import org.apache.hadoop.hbase.codec.Codec;
034import org.apache.hadoop.hbase.io.HeapSize;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.ClassSize;
037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
041import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor;
044
045
046/**
047 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects)
048 * that came in as a single transaction. All the edits for a given transaction are written out as a
049 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder.
050 * <p>This class is LimitedPrivate for CPs to read-only. The {@link #add} methods are
051 * classified as private methods, not for use by CPs.</p>
052 *
053 * <p>A particular WALEdit 'type' is the 'meta' type used to mark key operational
054 * events in the WAL such as compaction, flush, or region open. These meta types do not traverse
055 * hbase memstores. They are edits made by the hbase system rather than edit data submitted by
056 * clients. They only show in the WAL. These 'Meta' types have not been formally specified
057 * (or made into an explicit class type). They evolved organically. HBASE-8457 suggests codifying
058 * a WALEdit 'type' by adding a type field to WALEdit that gets serialized into the WAL. TODO.
059 * Would have to work on the consumption-side. Reading WALs on replay we seem to consume
060 * a Cell-at-a-time rather than by WALEdit. We are already in the below going out of our
061 * way to figure particular types --  e.g. if a compaction, replay, or close meta Marker -- during
062 * normal processing so would make sense to do this. Current system is an awkward marking of Cell
063 * columnfamily as {@link #METAFAMILY} and then setting qualifier based off meta edit type. For
064 * replay-time where we read Cell-at-a-time, there are utility methods below for figuring
065 * meta type. See also
066 * {@link #createBulkLoadEvent(RegionInfo, WALProtos.BulkLoadDescriptor)}, etc., for where we
067 * create meta WALEdit instances.</p>
068 *
069 * <p>WALEdit will accumulate a Set of all column family names referenced by the Cells
070 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the
071 * column family name to-hand.. just shove it into the WALEdit if available. Doing this, we can
072 * save on a parse of each Cell to figure column family down the line when we go to add the
073 * WALEdit to the WAL file. See the hand-off in FSWALEntry Constructor.
074 * @see WALKey
075 */
076// TODO: Do not expose this class to Coprocessors. It has set methods. A CP might meddle.
077@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION,
078    HBaseInterfaceAudience.COPROC })
079public class WALEdit implements HeapSize {
080  // Below defines are for writing WALEdit 'meta' Cells..
081  // TODO: Get rid of this system of special 'meta' Cells. See HBASE-8457. It suggests
082  // adding a type to WALEdit itself for use denoting meta Edits and their types.
083  public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
084
085  /**
086   * @deprecated Since 2.3.0. Not used.
087   */
088  @Deprecated
089  public static final byte [] METAROW = Bytes.toBytes("METAROW");
090
091  /**
092   * @deprecated Since 2.3.0. Make it protected, internal-use only. Use
093   *   {@link #isCompactionMarker(Cell)}
094   */
095  @Deprecated
096  @VisibleForTesting
097  public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION");
098
099  /**
100   * @deprecated Since 2.3.0. Make it protected, internal-use only.
101   */
102  @Deprecated
103  @VisibleForTesting
104  public static final byte [] FLUSH = Bytes.toBytes("HBASE::FLUSH");
105
106  /**
107   * Qualifier for region event meta 'Marker' WALEdits start with the
108   * {@link #REGION_EVENT_PREFIX} prefix ('HBASE::REGION_EVENT::'). After the prefix,
109   * we note the type of the event which we get from the RegionEventDescriptor protobuf
110   * instance type (A RegionEventDescriptor protobuf instance is written as the meta Marker
111   * Cell value). Adding a type suffix means we do not have to deserialize the protobuf to
112   * figure out what type of event this is.. .just read the qualifier suffix. For example,
113   * a close region event descriptor will have a qualifier of HBASE::REGION_EVENT::REGION_CLOSE.
114   * See WAL.proto and the EventType in RegionEventDescriptor protos for all possible
115   * event types.
116   */
117  private static final String REGION_EVENT_STR = "HBASE::REGION_EVENT";
118  private static final String REGION_EVENT_PREFIX_STR = REGION_EVENT_STR + "::";
119  private static final byte [] REGION_EVENT_PREFIX = Bytes.toBytes(REGION_EVENT_PREFIX_STR);
120
121  /**
122   * @deprecated Since 2.3.0. Remove. Not for external use. Not used.
123   */
124  @Deprecated
125  public static final byte [] REGION_EVENT = Bytes.toBytes(REGION_EVENT_STR);
126
127  /**
128   * We use this define figuring if we are carrying a close event.
129   */
130  private static final byte [] REGION_EVENT_CLOSE =
131      createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType.REGION_CLOSE);
132
133  @VisibleForTesting
134  public static final byte [] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD");
135
136  private final transient boolean replay;
137
138  private ArrayList<Cell> cells;
139
140  /**
141   * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and
142   * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have
143   * to recalculate the Set of families in a transaction; makes for a bunch of CPU savings.
144   */
145  private Set<byte []> families = null;
146
147  public WALEdit() {
148    this(1, false);
149  }
150
151  /**
152   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
153   *   instead.
154   * @see #WALEdit(int, boolean)
155   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
156   */
157  @Deprecated
158  public WALEdit(boolean replay) {
159    this(1, replay);
160  }
161
162  /**
163   * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)}
164   *   instead.
165   * @see #WALEdit(int, boolean)
166   * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a>
167   */
168  @Deprecated
169  public WALEdit(int cellCount) {
170    this(cellCount, false);
171  }
172
173  /**
174   * @param cellCount Pass so can pre-size the WALEdit. Optimization.
175   */
176  public WALEdit(int cellCount, boolean isReplay) {
177    this.replay = isReplay;
178    cells = new ArrayList<>(cellCount);
179  }
180
181  private Set<byte[]> getOrCreateFamilies() {
182    if (this.families == null) {
183      this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR);
184    }
185    return this.families;
186  }
187
188  /**
189   * For use by FSWALEntry ONLY. An optimization.
190   * @return All families in {@link #getCells()}; may be null.
191   */
192  public Set<byte []> getFamilies() {
193    return this.families;
194  }
195
196  /**
197   * @return True is <code>f</code> is {@link #METAFAMILY}
198   * @deprecated Since 2.3.0. Do not expose. Make protected.
199   */
200  @Deprecated
201  public static boolean isMetaEditFamily(final byte [] f) {
202    return Bytes.equals(METAFAMILY, f);
203  }
204
205  /**
206   * Replaying WALs can read Cell-at-a-time so need this method in those cases.
207   */
208  public static boolean isMetaEditFamily(Cell cell) {
209    return CellUtil.matchingFamily(cell, METAFAMILY);
210  }
211
212  /**
213   * @return True if this is a meta edit; has one edit only and its columnfamily
214   *   is {@link #METAFAMILY}.
215   */
216  public boolean isMetaEdit() {
217    return this.families != null && this.families.size() == 1 && this.families.contains(METAFAMILY);
218  }
219
220  /**
221   * @return True when current WALEdit is created by log replay. Replication skips WALEdits from
222   *         replay.
223   */
224  public boolean isReplay() {
225    return this.replay;
226  }
227
228  @InterfaceAudience.Private
229  public WALEdit add(Cell cell, byte [] family) {
230    getOrCreateFamilies().add(family);
231    return addCell(cell);
232  }
233
234  @InterfaceAudience.Private
235  public WALEdit add(Cell cell) {
236    // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use
237    // add(Map) or add(Cell, family).
238    return add(cell, CellUtil.cloneFamily(cell));
239  }
240
241  public boolean isEmpty() {
242    return cells.isEmpty();
243  }
244
245  public int size() {
246    return cells.size();
247  }
248
249  public ArrayList<Cell> getCells() {
250    return cells;
251  }
252
253  /**
254   * This is not thread safe.
255   * This will change the WALEdit and shouldn't be used unless you are sure that nothing
256   * else depends on the contents being immutable.
257   *
258   * @param cells the list of cells that this WALEdit now contains.
259   */
260  @InterfaceAudience.Private
261  // Used by replay.
262  public void setCells(ArrayList<Cell> cells) {
263    this.cells = cells;
264    this.families = null;
265  }
266
267  /**
268   * Reads WALEdit from cells.
269   * @param cellDecoder Cell decoder.
270   * @param expectedCount Expected cell count.
271   * @return Number of KVs read.
272   */
273  public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException {
274    cells.clear();
275    cells.ensureCapacity(expectedCount);
276    while (cells.size() < expectedCount && cellDecoder.advance()) {
277      add(cellDecoder.current());
278    }
279    return cells.size();
280  }
281
282  @Override
283  public long heapSize() {
284    long ret = ClassSize.ARRAYLIST;
285    for (Cell cell : cells) {
286      ret += cell.heapSize();
287    }
288    return ret;
289  }
290
291  public long estimatedSerializedSizeOf() {
292    long ret = 0;
293    for (Cell cell: cells) {
294      ret += PrivateCellUtil.estimatedSerializedSizeOf(cell);
295    }
296    return ret;
297  }
298
299  @Override
300  public String toString() {
301    StringBuilder sb = new StringBuilder();
302
303    sb.append("[#edits: ").append(cells.size()).append(" = <");
304    for (Cell cell : cells) {
305      sb.append(cell);
306      sb.append("; ");
307    }
308    sb.append(">]");
309    return sb.toString();
310  }
311
312  public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) {
313    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH,
314      EnvironmentEdgeManager.currentTime(), f.toByteArray());
315    return new WALEdit().add(kv, METAFAMILY);
316  }
317
318  public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException {
319    return CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)?
320        FlushDescriptor.parseFrom(CellUtil.cloneValue(cell)): null;
321  }
322
323  /**
324   * @return A meta Marker WALEdit that has a single Cell whose value is the passed in
325   *   <code>regionEventDesc</code> serialized and whose row is this region,
326   *   columnfamily is {@link #METAFAMILY} and qualifier is
327   *   {@link #REGION_EVENT_PREFIX} + {@link RegionEventDescriptor#getEventType()};
328   *   for example HBASE::REGION_EVENT::REGION_CLOSE.
329   */
330  public static WALEdit createRegionEventWALEdit(RegionInfo hri,
331      RegionEventDescriptor regionEventDesc) {
332    return createRegionEventWALEdit(getRowForRegion(hri), regionEventDesc);
333  }
334
335  @VisibleForTesting
336  public static WALEdit createRegionEventWALEdit(byte [] rowForRegion,
337      RegionEventDescriptor regionEventDesc) {
338    KeyValue kv = new KeyValue(rowForRegion, METAFAMILY,
339        createRegionEventDescriptorQualifier(regionEventDesc.getEventType()),
340        EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray());
341    return new WALEdit().add(kv, METAFAMILY);
342  }
343
344  /**
345   * @return Cell qualifier for the passed in RegionEventDescriptor Type; e.g. we'll
346   *   return something like a byte array with HBASE::REGION_EVENT::REGION_OPEN in it.
347   */
348  @VisibleForTesting
349  public static byte [] createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType t) {
350    return Bytes.toBytes(REGION_EVENT_PREFIX_STR + t.toString());
351  }
352
353  /**
354   * Public so can be accessed from regionserver.wal package.
355   * @return True if this is a Marker Edit and it is a RegionClose type.
356   */
357  public boolean isRegionCloseMarker() {
358    return isMetaEdit() && PrivateCellUtil.matchingQualifier(this.cells.get(0),
359        REGION_EVENT_CLOSE, 0, REGION_EVENT_CLOSE.length);
360  }
361
362  /**
363   * @return Returns a RegionEventDescriptor made by deserializing the content of the
364   *   passed in <code>cell</code>, IFF the <code>cell</code> is a RegionEventDescriptor
365   *   type WALEdit.
366   */
367  public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException {
368    return CellUtil.matchingColumnFamilyAndQualifierPrefix(cell, METAFAMILY, REGION_EVENT_PREFIX)?
369      RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell)): null;
370  }
371
372  /**
373   * @return A Marker WALEdit that has <code>c</code> serialized as its value
374   */
375  public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) {
376    byte [] pbbytes = c.toByteArray();
377    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION,
378      EnvironmentEdgeManager.currentTime(), pbbytes);
379    return new WALEdit().add(kv, METAFAMILY); //replication scope null so this won't be replicated
380  }
381
382  public static byte[] getRowForRegion(RegionInfo hri) {
383    byte[] startKey = hri.getStartKey();
384    if (startKey.length == 0) {
385      // empty row key is not allowed in mutations because it is both the start key and the end key
386      // we return the smallest byte[] that is bigger (in lex comparison) than byte[0].
387      return new byte[] {0};
388    }
389    return startKey;
390  }
391
392  /**
393   * Deserialized and returns a CompactionDescriptor is the KeyValue contains one.
394   * @param kv the key value
395   * @return deserialized CompactionDescriptor or null.
396   */
397  public static CompactionDescriptor getCompaction(Cell kv) throws IOException {
398    return isCompactionMarker(kv)? CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)): null;
399  }
400
401  /**
402   * Returns true if the given cell is a serialized {@link CompactionDescriptor}
403   *
404   * @see #getCompaction(Cell)
405   */
406  public static boolean isCompactionMarker(Cell cell) {
407    return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION);
408  }
409
410  /**
411   * Create a bulk loader WALEdit
412   *
413   * @param hri                The RegionInfo for the region in which we are bulk loading
414   * @param bulkLoadDescriptor The descriptor for the Bulk Loader
415   * @return The WALEdit for the BulkLoad
416   */
417  public static WALEdit createBulkLoadEvent(RegionInfo hri,
418      WALProtos.BulkLoadDescriptor bulkLoadDescriptor) {
419    KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, BULK_LOAD,
420        EnvironmentEdgeManager.currentTime(), bulkLoadDescriptor.toByteArray());
421    return new WALEdit().add(kv, METAFAMILY);
422  }
423
424  /**
425   * Deserialized and returns a BulkLoadDescriptor from the passed in Cell
426   * @param cell the key value
427   * @return deserialized BulkLoadDescriptor or null.
428   */
429  public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException {
430    return CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)?
431      WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell)):  null;
432  }
433
434  /**
435   * Append the given map of family->edits to a WALEdit data structure.
436   * This does not write to the WAL itself.
437   * Note that as an optimization, we will stamp the Set of column families into the WALEdit
438   * to save on our having to calculate column families subsequently down in the actual WAL
439   * writing.
440   *
441   * @param familyMap map of family->edits
442   */
443  public void add(Map<byte[], List<Cell>> familyMap) {
444    for (Map.Entry<byte [], List<Cell>> e: familyMap.entrySet()) {
445      // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects."
446      int listSize = e.getValue().size();
447      // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)}
448      // and have it clone family each time. Optimization!
449      for (int i = 0; i < listSize; i++) {
450        addCell(e.getValue().get(i));
451      }
452      addFamily(e.getKey());
453    }
454  }
455
456  private void addFamily(byte [] family) {
457    getOrCreateFamilies().add(family);
458  }
459
460  private WALEdit addCell(Cell cell) {
461    this.cells.add(cell);
462    return this;
463  }
464}