001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Map; 024import java.util.Set; 025import java.util.TreeSet; 026 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellUtil; 029import org.apache.hadoop.hbase.HBaseInterfaceAudience; 030import org.apache.hadoop.hbase.PrivateCellUtil; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.client.RegionInfo; 033import org.apache.hadoop.hbase.codec.Codec; 034import org.apache.hadoop.hbase.io.HeapSize; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hbase.util.ClassSize; 037import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 040import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; 041import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor; 042import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor; 044 045 046/** 047 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects) 048 * that came in as a single transaction. All the edits for a given transaction are written out as a 049 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder. 050 * <p>This class is LimitedPrivate for CPs to read-only. The {@link #add} methods are 051 * classified as private methods, not for use by CPs.</p> 052 * 053 * <p>A particular WALEdit 'type' is the 'meta' type used to mark key operational 054 * events in the WAL such as compaction, flush, or region open. These meta types do not traverse 055 * hbase memstores. They are edits made by the hbase system rather than edit data submitted by 056 * clients. They only show in the WAL. These 'Meta' types have not been formally specified 057 * (or made into an explicit class type). They evolved organically. HBASE-8457 suggests codifying 058 * a WALEdit 'type' by adding a type field to WALEdit that gets serialized into the WAL. TODO. 059 * Would have to work on the consumption-side. Reading WALs on replay we seem to consume 060 * a Cell-at-a-time rather than by WALEdit. We are already in the below going out of our 061 * way to figure particular types -- e.g. if a compaction, replay, or close meta Marker -- during 062 * normal processing so would make sense to do this. Current system is an awkward marking of Cell 063 * columnfamily as {@link #METAFAMILY} and then setting qualifier based off meta edit type. For 064 * replay-time where we read Cell-at-a-time, there are utility methods below for figuring 065 * meta type. See also 066 * {@link #createBulkLoadEvent(RegionInfo, WALProtos.BulkLoadDescriptor)}, etc., for where we 067 * create meta WALEdit instances.</p> 068 * 069 * <p>WALEdit will accumulate a Set of all column family names referenced by the Cells 070 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the 071 * column family name to-hand.. just shove it into the WALEdit if available. Doing this, we can 072 * save on a parse of each Cell to figure column family down the line when we go to add the 073 * WALEdit to the WAL file. See the hand-off in FSWALEntry Constructor. 074 * @see WALKey 075 */ 076// TODO: Do not expose this class to Coprocessors. It has set methods. A CP might meddle. 077@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION, 078 HBaseInterfaceAudience.COPROC }) 079public class WALEdit implements HeapSize { 080 // Below defines are for writing WALEdit 'meta' Cells.. 081 // TODO: Get rid of this system of special 'meta' Cells. See HBASE-8457. It suggests 082 // adding a type to WALEdit itself for use denoting meta Edits and their types. 083 public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY"); 084 085 /** 086 * @deprecated Since 2.3.0. Not used. 087 */ 088 @Deprecated 089 public static final byte [] METAROW = Bytes.toBytes("METAROW"); 090 091 /** 092 * @deprecated Since 2.3.0. Make it protected, internal-use only. Use 093 * {@link #isCompactionMarker(Cell)} 094 */ 095 @Deprecated 096 @VisibleForTesting 097 public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION"); 098 099 /** 100 * @deprecated Since 2.3.0. Make it protected, internal-use only. 101 */ 102 @Deprecated 103 @VisibleForTesting 104 public static final byte [] FLUSH = Bytes.toBytes("HBASE::FLUSH"); 105 106 /** 107 * Qualifier for region event meta 'Marker' WALEdits start with the 108 * {@link #REGION_EVENT_PREFIX} prefix ('HBASE::REGION_EVENT::'). After the prefix, 109 * we note the type of the event which we get from the RegionEventDescriptor protobuf 110 * instance type (A RegionEventDescriptor protobuf instance is written as the meta Marker 111 * Cell value). Adding a type suffix means we do not have to deserialize the protobuf to 112 * figure out what type of event this is.. .just read the qualifier suffix. For example, 113 * a close region event descriptor will have a qualifier of HBASE::REGION_EVENT::REGION_CLOSE. 114 * See WAL.proto and the EventType in RegionEventDescriptor protos for all possible 115 * event types. 116 */ 117 private static final String REGION_EVENT_STR = "HBASE::REGION_EVENT"; 118 private static final String REGION_EVENT_PREFIX_STR = REGION_EVENT_STR + "::"; 119 private static final byte [] REGION_EVENT_PREFIX = Bytes.toBytes(REGION_EVENT_PREFIX_STR); 120 121 /** 122 * @deprecated Since 2.3.0. Remove. Not for external use. Not used. 123 */ 124 @Deprecated 125 public static final byte [] REGION_EVENT = Bytes.toBytes(REGION_EVENT_STR); 126 127 /** 128 * We use this define figuring if we are carrying a close event. 129 */ 130 private static final byte [] REGION_EVENT_CLOSE = 131 createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType.REGION_CLOSE); 132 133 @VisibleForTesting 134 public static final byte [] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD"); 135 136 private final transient boolean replay; 137 138 private ArrayList<Cell> cells; 139 140 /** 141 * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and 142 * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have 143 * to recalculate the Set of families in a transaction; makes for a bunch of CPU savings. 144 */ 145 private Set<byte []> families = null; 146 147 public WALEdit() { 148 this(1, false); 149 } 150 151 /** 152 * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)} 153 * instead. 154 * @see #WALEdit(int, boolean) 155 * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a> 156 */ 157 @Deprecated 158 public WALEdit(boolean replay) { 159 this(1, replay); 160 } 161 162 /** 163 * @deprecated since 2.0.1 and will be removed in 4.0.0. Use {@link #WALEdit(int, boolean)} 164 * instead. 165 * @see #WALEdit(int, boolean) 166 * @see <a href="https://issues.apache.org/jira/browse/HBASE-20781">HBASE-20781</a> 167 */ 168 @Deprecated 169 public WALEdit(int cellCount) { 170 this(cellCount, false); 171 } 172 173 /** 174 * @param cellCount Pass so can pre-size the WALEdit. Optimization. 175 */ 176 public WALEdit(int cellCount, boolean isReplay) { 177 this.replay = isReplay; 178 cells = new ArrayList<>(cellCount); 179 } 180 181 private Set<byte[]> getOrCreateFamilies() { 182 if (this.families == null) { 183 this.families = new TreeSet<>(Bytes.BYTES_COMPARATOR); 184 } 185 return this.families; 186 } 187 188 /** 189 * For use by FSWALEntry ONLY. An optimization. 190 * @return All families in {@link #getCells()}; may be null. 191 */ 192 public Set<byte []> getFamilies() { 193 return this.families; 194 } 195 196 /** 197 * @return True is <code>f</code> is {@link #METAFAMILY} 198 * @deprecated Since 2.3.0. Do not expose. Make protected. 199 */ 200 @Deprecated 201 public static boolean isMetaEditFamily(final byte [] f) { 202 return Bytes.equals(METAFAMILY, f); 203 } 204 205 /** 206 * Replaying WALs can read Cell-at-a-time so need this method in those cases. 207 */ 208 public static boolean isMetaEditFamily(Cell cell) { 209 return CellUtil.matchingFamily(cell, METAFAMILY); 210 } 211 212 /** 213 * @return True if this is a meta edit; has one edit only and its columnfamily 214 * is {@link #METAFAMILY}. 215 */ 216 public boolean isMetaEdit() { 217 return this.families != null && this.families.size() == 1 && this.families.contains(METAFAMILY); 218 } 219 220 /** 221 * @return True when current WALEdit is created by log replay. Replication skips WALEdits from 222 * replay. 223 */ 224 public boolean isReplay() { 225 return this.replay; 226 } 227 228 @InterfaceAudience.Private 229 public WALEdit add(Cell cell, byte [] family) { 230 getOrCreateFamilies().add(family); 231 return addCell(cell); 232 } 233 234 @InterfaceAudience.Private 235 public WALEdit add(Cell cell) { 236 // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use 237 // add(Map) or add(Cell, family). 238 return add(cell, CellUtil.cloneFamily(cell)); 239 } 240 241 public boolean isEmpty() { 242 return cells.isEmpty(); 243 } 244 245 public int size() { 246 return cells.size(); 247 } 248 249 public ArrayList<Cell> getCells() { 250 return cells; 251 } 252 253 /** 254 * This is not thread safe. 255 * This will change the WALEdit and shouldn't be used unless you are sure that nothing 256 * else depends on the contents being immutable. 257 * 258 * @param cells the list of cells that this WALEdit now contains. 259 */ 260 @InterfaceAudience.Private 261 // Used by replay. 262 public void setCells(ArrayList<Cell> cells) { 263 this.cells = cells; 264 this.families = null; 265 } 266 267 /** 268 * Reads WALEdit from cells. 269 * @param cellDecoder Cell decoder. 270 * @param expectedCount Expected cell count. 271 * @return Number of KVs read. 272 */ 273 public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException { 274 cells.clear(); 275 cells.ensureCapacity(expectedCount); 276 while (cells.size() < expectedCount && cellDecoder.advance()) { 277 add(cellDecoder.current()); 278 } 279 return cells.size(); 280 } 281 282 @Override 283 public long heapSize() { 284 long ret = ClassSize.ARRAYLIST; 285 for (Cell cell : cells) { 286 ret += PrivateCellUtil.estimatedSizeOfCell(cell); 287 } 288 return ret; 289 } 290 291 public long estimatedSerializedSizeOf() { 292 long ret = 0; 293 for (Cell cell: cells) { 294 ret += PrivateCellUtil.estimatedSerializedSizeOf(cell); 295 } 296 return ret; 297 } 298 299 @Override 300 public String toString() { 301 StringBuilder sb = new StringBuilder(); 302 303 sb.append("[#edits: ").append(cells.size()).append(" = <"); 304 for (Cell cell : cells) { 305 sb.append(cell); 306 sb.append("; "); 307 } 308 sb.append(">]"); 309 return sb.toString(); 310 } 311 312 public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) { 313 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH, 314 EnvironmentEdgeManager.currentTime(), f.toByteArray()); 315 return new WALEdit().add(kv, METAFAMILY); 316 } 317 318 public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException { 319 return CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)? 320 FlushDescriptor.parseFrom(CellUtil.cloneValue(cell)): null; 321 } 322 323 /** 324 * @return A meta Marker WALEdit that has a single Cell whose value is the passed in 325 * <code>regionEventDesc</code> serialized and whose row is this region, 326 * columnfamily is {@link #METAFAMILY} and qualifier is 327 * {@link #REGION_EVENT_PREFIX} + {@link RegionEventDescriptor#getEventType()}; 328 * for example HBASE::REGION_EVENT::REGION_CLOSE. 329 */ 330 public static WALEdit createRegionEventWALEdit(RegionInfo hri, 331 RegionEventDescriptor regionEventDesc) { 332 return createRegionEventWALEdit(getRowForRegion(hri), regionEventDesc); 333 } 334 335 @VisibleForTesting 336 public static WALEdit createRegionEventWALEdit(byte [] rowForRegion, 337 RegionEventDescriptor regionEventDesc) { 338 KeyValue kv = new KeyValue(rowForRegion, METAFAMILY, 339 createRegionEventDescriptorQualifier(regionEventDesc.getEventType()), 340 EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray()); 341 return new WALEdit().add(kv, METAFAMILY); 342 } 343 344 /** 345 * @return Cell qualifier for the passed in RegionEventDescriptor Type; e.g. we'll 346 * return something like a byte array with HBASE::REGION_EVENT::REGION_OPEN in it. 347 */ 348 @VisibleForTesting 349 public static byte [] createRegionEventDescriptorQualifier(RegionEventDescriptor.EventType t) { 350 return Bytes.toBytes(REGION_EVENT_PREFIX_STR + t.toString()); 351 } 352 353 /** 354 * Public so can be accessed from regionserver.wal package. 355 * @return True if this is a Marker Edit and it is a RegionClose type. 356 */ 357 public boolean isRegionCloseMarker() { 358 return isMetaEdit() && PrivateCellUtil.matchingQualifier(this.cells.get(0), 359 REGION_EVENT_CLOSE, 0, REGION_EVENT_CLOSE.length); 360 } 361 362 /** 363 * @return Returns a RegionEventDescriptor made by deserializing the content of the 364 * passed in <code>cell</code>, IFF the <code>cell</code> is a RegionEventDescriptor 365 * type WALEdit. 366 */ 367 public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException { 368 return CellUtil.matchingColumnFamilyAndQualifierPrefix(cell, METAFAMILY, REGION_EVENT_PREFIX)? 369 RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell)): null; 370 } 371 372 /** 373 * @return A Marker WALEdit that has <code>c</code> serialized as its value 374 */ 375 public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) { 376 byte [] pbbytes = c.toByteArray(); 377 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION, 378 EnvironmentEdgeManager.currentTime(), pbbytes); 379 return new WALEdit().add(kv, METAFAMILY); //replication scope null so this won't be replicated 380 } 381 382 public static byte[] getRowForRegion(RegionInfo hri) { 383 byte[] startKey = hri.getStartKey(); 384 if (startKey.length == 0) { 385 // empty row key is not allowed in mutations because it is both the start key and the end key 386 // we return the smallest byte[] that is bigger (in lex comparison) than byte[0]. 387 return new byte[] {0}; 388 } 389 return startKey; 390 } 391 392 /** 393 * Deserialized and returns a CompactionDescriptor is the KeyValue contains one. 394 * @param kv the key value 395 * @return deserialized CompactionDescriptor or null. 396 */ 397 public static CompactionDescriptor getCompaction(Cell kv) throws IOException { 398 return isCompactionMarker(kv)? CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)): null; 399 } 400 401 /** 402 * Returns true if the given cell is a serialized {@link CompactionDescriptor} 403 * 404 * @see #getCompaction(Cell) 405 */ 406 public static boolean isCompactionMarker(Cell cell) { 407 return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION); 408 } 409 410 /** 411 * Create a bulk loader WALEdit 412 * 413 * @param hri The RegionInfo for the region in which we are bulk loading 414 * @param bulkLoadDescriptor The descriptor for the Bulk Loader 415 * @return The WALEdit for the BulkLoad 416 */ 417 public static WALEdit createBulkLoadEvent(RegionInfo hri, 418 WALProtos.BulkLoadDescriptor bulkLoadDescriptor) { 419 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, BULK_LOAD, 420 EnvironmentEdgeManager.currentTime(), bulkLoadDescriptor.toByteArray()); 421 return new WALEdit().add(kv, METAFAMILY); 422 } 423 424 /** 425 * Deserialized and returns a BulkLoadDescriptor from the passed in Cell 426 * @param cell the key value 427 * @return deserialized BulkLoadDescriptor or null. 428 */ 429 public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException { 430 return CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)? 431 WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell)): null; 432 } 433 434 /** 435 * Append the given map of family->edits to a WALEdit data structure. 436 * This does not write to the WAL itself. 437 * Note that as an optimization, we will stamp the Set of column families into the WALEdit 438 * to save on our having to calculate column families subsequently down in the actual WAL 439 * writing. 440 * 441 * @param familyMap map of family->edits 442 */ 443 public void add(Map<byte[], List<Cell>> familyMap) { 444 for (Map.Entry<byte [], List<Cell>> e: familyMap.entrySet()) { 445 // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects." 446 int listSize = e.getValue().size(); 447 // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)} 448 // and have it clone family each time. Optimization! 449 for (int i = 0; i < listSize; i++) { 450 addCell(e.getValue().get(i)); 451 } 452 addFamily(e.getKey()); 453 } 454 } 455 456 private void addFamily(byte [] family) { 457 getOrCreateFamilies().add(family); 458 } 459 460 private WALEdit addCell(Cell cell) { 461 this.cells.add(cell); 462 return this; 463 } 464}