View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.wal;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.EOFException;
24  import java.io.IOException;
25  import java.io.InterruptedIOException;
26  import java.util.ArrayList;
27  import java.util.Collections;
28  import java.util.HashMap;
29  import java.util.Iterator;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.NavigableMap;
33  import java.util.TreeMap;
34  import java.util.UUID;
35  import java.util.concurrent.CountDownLatch;
36  
37  
38  import com.google.protobuf.HBaseZeroCopyByteString;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.classification.InterfaceAudience;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
47  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FamilyScope;
48  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.ScopeType;
49  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.WALKey;
50  import org.apache.hadoop.hbase.regionserver.SequenceNumber;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
53  import org.apache.hadoop.io.WritableComparable;
54  import org.apache.hadoop.io.WritableUtils;
55  
56  import com.google.common.annotations.VisibleForTesting;
57  import com.google.protobuf.ByteString;
58  
59  /**
60   * A Key for an entry in the change log.
61   *
62   * The log intermingles edits to many tables and rows, so each log entry
63   * identifies the appropriate table and row.  Within a table and row, they're
64   * also sorted.
65   *
66   * <p>Some Transactional edits (START, COMMIT, ABORT) will not have an
67   * associated row.
68   */
69  // TODO: Key and WALEdit are never used separately, or in one-to-many relation, for practical
70  //       purposes. They need to be merged into HLogEntry.
71  @InterfaceAudience.Private
72  public class HLogKey implements WritableComparable<HLogKey>, SequenceNumber {
73    public static final Log LOG = LogFactory.getLog(HLogKey.class);
74  
75    // should be < 0 (@see #readFields(DataInput))
76    // version 2 supports HLog compression
77    enum Version {
78      UNVERSIONED(0),
79      // Initial number we put on HLogKey when we introduced versioning.
80      INITIAL(-1),
81      // Version -2 introduced a dictionary compression facility.  Only this
82      // dictionary-based compression is available in version -2.
83      COMPRESSED(-2);
84  
85      final int code;
86      static final Version[] byCode;
87      static {
88        byCode = Version.values();
89        for (int i = 0; i < byCode.length; i++) {
90          if (byCode[i].code != -1 * i) {
91            throw new AssertionError("Values in this enum should be descending by one");
92          }
93        }
94      }
95  
96      Version(int code) {
97        this.code = code;
98      }
99  
100     boolean atLeast(Version other) {
101       return code <= other.code;
102     }
103 
104     static Version fromCode(int code) {
105       return byCode[code * -1];
106     }
107   }
108 
109   /*
110    * This is used for reading the log entries created by the previous releases
111    * (0.94.11) which write the clusters information to the scopes of WALEdit.
112    */
113   private static final String PREFIX_CLUSTER_KEY = ".";
114 
115 
116   private static final Version VERSION = Version.COMPRESSED;
117 
118   //  The encoded region name.
119   private byte [] encodedRegionName;
120   private TableName tablename;
121   private long logSeqNum;
122   private CountDownLatch seqNumAssignedLatch = new CountDownLatch(1);
123   // Time at which this edit was written.
124   private long writeTime;
125 
126   // The first element in the list is the cluster id on which the change has originated
127   private List<UUID> clusterIds;
128 
129   private NavigableMap<byte[], Integer> scopes;
130 
131   private long nonceGroup = HConstants.NO_NONCE;
132   private long nonce = HConstants.NO_NONCE;
133   static final List<UUID> EMPTY_UUIDS = Collections.unmodifiableList(new ArrayList<UUID>());
134 
135   private CompressionContext compressionContext;
136 
137   public HLogKey() {
138     init(null, null, 0L, HConstants.LATEST_TIMESTAMP,
139         new ArrayList<UUID>(), HConstants.NO_NONCE, HConstants.NO_NONCE);
140   }
141 
142   @VisibleForTesting
143   public HLogKey(final byte[] encodedRegionName, final TableName tablename, long logSeqNum,
144       final long now, UUID clusterId) {
145     List<UUID> clusterIds = new ArrayList<UUID>();
146     clusterIds.add(clusterId);
147     init(encodedRegionName, tablename, logSeqNum, now, clusterIds,
148         HConstants.NO_NONCE, HConstants.NO_NONCE);
149   }
150 
151   public HLogKey(final byte[] encodedRegionName, final TableName tablename) {
152     this(encodedRegionName, tablename, System.currentTimeMillis());
153   }
154 
155   public HLogKey(final byte[] encodedRegionName, final TableName tablename, final long now) {
156     init(encodedRegionName, tablename, HLog.NO_SEQUENCE_ID, now,
157         EMPTY_UUIDS, HConstants.NO_NONCE, HConstants.NO_NONCE);
158   }
159 
160   /**
161    * Create the log key for writing to somewhere.
162    * We maintain the tablename mainly for debugging purposes.
163    * A regionName is always a sub-table object.
164    * <p>Used by log splitting and snapshots.
165    *
166    * @param encodedRegionName Encoded name of the region as returned by
167    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
168    * @param tablename   - name of table
169    * @param logSeqNum   - log sequence number
170    * @param now Time at which this edit was written.
171    * @param clusterIds the clusters that have consumed the change(used in Replication)
172    */
173   public HLogKey(final byte [] encodedRegionName, final TableName tablename,
174       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
175     init(encodedRegionName, tablename, logSeqNum, now, clusterIds, nonceGroup, nonce);
176   }
177 
178   /**
179    * Create the log key for writing to somewhere.
180    * We maintain the tablename mainly for debugging purposes.
181    * A regionName is always a sub-table object.
182    *
183    * @param encodedRegionName Encoded name of the region as returned by
184    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
185    * @param tablename
186    * @param now Time at which this edit was written.
187    * @param clusterIds the clusters that have consumed the change(used in Replication)
188    * @param nonceGroup
189    * @param nonce
190    */
191   public HLogKey(final byte [] encodedRegionName, final TableName tablename,
192       final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
193     init(encodedRegionName, tablename, HLog.NO_SEQUENCE_ID, now, clusterIds, 
194       nonceGroup, nonce);
195   }
196 
197   /**
198    * Create the log key for writing to somewhere.
199    * We maintain the tablename mainly for debugging purposes.
200    * A regionName is always a sub-table object.
201    *
202    * @param encodedRegionName Encoded name of the region as returned by
203    * <code>HRegionInfo#getEncodedNameAsBytes()</code>.
204    * @param tablename
205    * @param logSeqNum
206    * @param nonceGroup
207    * @param nonce
208    */
209   public HLogKey(final byte [] encodedRegionName, final TableName tablename, long logSeqNum,
210       long nonceGroup, long nonce) {
211     init(encodedRegionName, tablename, logSeqNum, EnvironmentEdgeManager.currentTimeMillis(), 
212       EMPTY_UUIDS, nonceGroup, nonce);
213   }
214 
215   protected void init(final byte [] encodedRegionName, final TableName tablename,
216       long logSeqNum, final long now, List<UUID> clusterIds, long nonceGroup, long nonce) {
217     this.logSeqNum = logSeqNum;
218     this.writeTime = now;
219     this.clusterIds = clusterIds;
220     this.encodedRegionName = encodedRegionName;
221     this.tablename = tablename;
222     this.nonceGroup = nonceGroup;
223     this.nonce = nonce;
224   }
225 
226   /**
227    * @param compressionContext Compression context to use
228    */
229   public void setCompressionContext(CompressionContext compressionContext) {
230     this.compressionContext = compressionContext;
231   }
232 
233   /** @return encoded region name */
234   public byte [] getEncodedRegionName() {
235     return encodedRegionName;
236   }
237 
238   /** @return table name */
239   public TableName getTablename() {
240     return tablename;
241   }
242 
243   /** @return log sequence number */
244   public long getLogSeqNum() {
245     return this.logSeqNum;
246   }
247 
248   /**
249    * Allow that the log sequence id to be set post-construction and release all waiters on assigned
250    * sequence number.
251    * @param sequence
252    */
253   void setLogSeqNum(final long sequence) {
254     this.logSeqNum = sequence;
255     this.seqNumAssignedLatch.countDown();
256   }
257   
258   /**
259    * Wait for sequence number is assigned & return the assigned value
260    * @return long the new assigned sequence number
261    * @throws InterruptedException
262    */
263   public long getSequenceNumber() throws IOException {
264     try {
265       this.seqNumAssignedLatch.await();
266     } catch (InterruptedException ie) {
267       LOG.warn("Thread interrupted waiting for next log sequence number");
268       InterruptedIOException iie = new InterruptedIOException();
269       iie.initCause(ie);
270       throw iie;
271     }
272     return this.logSeqNum;
273   }
274 
275   /**
276    * @return the write time
277    */
278   public long getWriteTime() {
279     return this.writeTime;
280   }
281 
282   public NavigableMap<byte[], Integer> getScopes() {
283     return scopes;
284   }
285 
286   /** @return The nonce group */
287   public long getNonceGroup() {
288     return nonceGroup;
289   }
290 
291   /** @return The nonce */
292   public long getNonce() {
293     return nonce;
294   }
295 
296   public void setScopes(NavigableMap<byte[], Integer> scopes) {
297     this.scopes = scopes;
298   }
299 
300   public void readOlderScopes(NavigableMap<byte[], Integer> scopes) {
301     if (scopes != null) {
302       Iterator<Map.Entry<byte[], Integer>> iterator = scopes.entrySet()
303           .iterator();
304       while (iterator.hasNext()) {
305         Map.Entry<byte[], Integer> scope = iterator.next();
306         String key = Bytes.toString(scope.getKey());
307         if (key.startsWith(PREFIX_CLUSTER_KEY)) {
308           addClusterId(UUID.fromString(key.substring(PREFIX_CLUSTER_KEY
309               .length())));
310           iterator.remove();
311         }
312       }
313       if (scopes.size() > 0) {
314         this.scopes = scopes;
315       }
316     }
317   }
318 
319   /**
320    * Marks that the cluster with the given clusterId has consumed the change
321    */
322   public void addClusterId(UUID clusterId) {
323     if (!clusterIds.contains(clusterId)) {
324       clusterIds.add(clusterId);
325     }
326   }
327 
328   /**
329    * @return the set of cluster Ids that have consumed the change
330    */
331   public List<UUID> getClusterIds() {
332     return clusterIds;
333   }
334 
335   /**
336    * @return the cluster id on which the change has originated. It there is no such cluster, it
337    *         returns DEFAULT_CLUSTER_ID (cases where replication is not enabled)
338    */
339   public UUID getOriginatingClusterId(){
340     return clusterIds.isEmpty() ? HConstants.DEFAULT_CLUSTER_ID : clusterIds.get(0);
341   }
342 
343   @Override
344   public String toString() {
345     return tablename + "/" + Bytes.toString(encodedRegionName) + "/" +
346       logSeqNum;
347   }
348 
349   /**
350    * Produces a string map for this key. Useful for programmatic use and
351    * manipulation of the data stored in an HLogKey, for example, printing
352    * as JSON.
353    *
354    * @return a Map containing data from this key
355    */
356   public Map<String, Object> toStringMap() {
357     Map<String, Object> stringMap = new HashMap<String, Object>();
358     stringMap.put("table", tablename);
359     stringMap.put("region", Bytes.toStringBinary(encodedRegionName));
360     stringMap.put("sequence", logSeqNum);
361     return stringMap;
362   }
363 
364   @Override
365   public boolean equals(Object obj) {
366     if (this == obj) {
367       return true;
368     }
369     if (obj == null || getClass() != obj.getClass()) {
370       return false;
371     }
372     return compareTo((HLogKey)obj) == 0;
373   }
374 
375   @Override
376   public int hashCode() {
377     int result = Bytes.hashCode(this.encodedRegionName);
378     result ^= this.logSeqNum;
379     result ^= this.writeTime;
380     return result;
381   }
382 
383   public int compareTo(HLogKey o) {
384     int result = Bytes.compareTo(this.encodedRegionName, o.encodedRegionName);
385     if (result == 0) {
386       if (this.logSeqNum < o.logSeqNum) {
387         result = -1;
388       } else if (this.logSeqNum  > o.logSeqNum) {
389         result = 1;
390       }
391       if (result == 0) {
392         if (this.writeTime < o.writeTime) {
393           result = -1;
394         } else if (this.writeTime > o.writeTime) {
395           return 1;
396         }
397       }
398     }
399     // why isn't cluster id accounted for?
400     return result;
401   }
402 
403   /**
404    * Drop this instance's tablename byte array and instead
405    * hold a reference to the provided tablename. This is not
406    * meant to be a general purpose setter - it's only used
407    * to collapse references to conserve memory.
408    */
409   void internTableName(TableName tablename) {
410     // We should not use this as a setter - only to swap
411     // in a new reference to the same table name.
412     assert tablename.equals(this.tablename);
413     this.tablename = tablename;
414   }
415 
416   /**
417    * Drop this instance's region name byte array and instead
418    * hold a reference to the provided region name. This is not
419    * meant to be a general purpose setter - it's only used
420    * to collapse references to conserve memory.
421    */
422   void internEncodedRegionName(byte []encodedRegionName) {
423     // We should not use this as a setter - only to swap
424     // in a new reference to the same table name.
425     assert Bytes.equals(this.encodedRegionName, encodedRegionName);
426     this.encodedRegionName = encodedRegionName;
427   }
428 
429   @Override
430   @Deprecated
431   public void write(DataOutput out) throws IOException {
432     LOG.warn("HLogKey is being serialized to writable - only expected in test code");
433     WritableUtils.writeVInt(out, VERSION.code);
434     if (compressionContext == null) {
435       Bytes.writeByteArray(out, this.encodedRegionName);
436       Bytes.writeByteArray(out, this.tablename.getName());
437     } else {
438       Compressor.writeCompressed(this.encodedRegionName, 0,
439           this.encodedRegionName.length, out,
440           compressionContext.regionDict);
441       Compressor.writeCompressed(this.tablename.getName(), 0, this.tablename.getName().length, out,
442           compressionContext.tableDict);
443     }
444     out.writeLong(this.logSeqNum);
445     out.writeLong(this.writeTime);
446     // Don't need to write the clusters information as we are using protobufs from 0.95
447     // Writing only the first clusterId for testing the legacy read
448     Iterator<UUID> iterator = clusterIds.iterator();
449     if(iterator.hasNext()){
450       out.writeBoolean(true);
451       UUID clusterId = iterator.next();
452       out.writeLong(clusterId.getMostSignificantBits());
453       out.writeLong(clusterId.getLeastSignificantBits());
454     } else {
455       out.writeBoolean(false);
456     }
457   }
458 
459   @Override
460   public void readFields(DataInput in) throws IOException {
461     Version version = Version.UNVERSIONED;
462     // HLogKey was not versioned in the beginning.
463     // In order to introduce it now, we make use of the fact
464     // that encodedRegionName was written with Bytes.writeByteArray,
465     // which encodes the array length as a vint which is >= 0.
466     // Hence if the vint is >= 0 we have an old version and the vint
467     // encodes the length of encodedRegionName.
468     // If < 0 we just read the version and the next vint is the length.
469     // @see Bytes#readByteArray(DataInput)
470     this.scopes = null; // writable HLogKey does not contain scopes
471     int len = WritableUtils.readVInt(in);
472     byte[] tablenameBytes = null;
473     if (len < 0) {
474       // what we just read was the version
475       version = Version.fromCode(len);
476       // We only compress V2 of HLogkey.
477       // If compression is on, the length is handled by the dictionary
478       if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
479         len = WritableUtils.readVInt(in);
480       }
481     }
482     if (compressionContext == null || !version.atLeast(Version.COMPRESSED)) {
483       this.encodedRegionName = new byte[len];
484       in.readFully(this.encodedRegionName);
485       tablenameBytes = Bytes.readByteArray(in);
486     } else {
487       this.encodedRegionName = Compressor.readCompressed(in, compressionContext.regionDict);
488       tablenameBytes = Compressor.readCompressed(in, compressionContext.tableDict);
489     }
490 
491     this.logSeqNum = in.readLong();
492     this.writeTime = in.readLong();
493 
494     this.clusterIds.clear();
495     if (version.atLeast(Version.INITIAL)) {
496       if (in.readBoolean()) {
497         // read the older log
498         // Definitely is the originating cluster
499         clusterIds.add(new UUID(in.readLong(), in.readLong()));
500       }
501     } else {
502       try {
503         // dummy read (former byte cluster id)
504         in.readByte();
505       } catch(EOFException e) {
506         // Means it's a very old key, just continue
507       }
508     }
509     try {
510       this.tablename = TableName.valueOf(tablenameBytes);
511     } catch (IllegalArgumentException iae) {
512       if (Bytes.toString(tablenameBytes).equals(TableName.OLD_META_STR)) {
513         // It is a pre-namespace meta table edit, continue with new format.
514         LOG.info("Got an old .META. edit, continuing with new format ");
515         this.tablename = TableName.META_TABLE_NAME;
516         this.encodedRegionName = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
517       } else if (Bytes.toString(tablenameBytes).equals(TableName.OLD_ROOT_STR)) {
518         this.tablename = TableName.OLD_ROOT_TABLE_NAME;
519          throw iae;
520       } else throw iae;
521     }
522     // Do not need to read the clusters information as we are using protobufs from 0.95
523   }
524 
525   public WALKey.Builder getBuilder(WALCellCodec.ByteStringCompressor compressor)
526   throws IOException {
527     WALKey.Builder builder = WALKey.newBuilder();
528     if (compressionContext == null) {
529       builder.setEncodedRegionName(HBaseZeroCopyByteString.wrap(this.encodedRegionName));
530       builder.setTableName(HBaseZeroCopyByteString.wrap(this.tablename.getName()));
531     } else {
532       builder.setEncodedRegionName(compressor.compress(this.encodedRegionName,
533         compressionContext.regionDict));
534       builder.setTableName(compressor.compress(this.tablename.getName(),
535         compressionContext.tableDict));
536     }
537     builder.setLogSequenceNumber(this.logSeqNum);
538     builder.setWriteTime(writeTime);
539     if (this.nonce != HConstants.NO_NONCE) {
540       builder.setNonce(nonce);
541     }
542     if (this.nonceGroup != HConstants.NO_NONCE) {
543       builder.setNonceGroup(nonceGroup);
544     }
545     HBaseProtos.UUID.Builder uuidBuilder = HBaseProtos.UUID.newBuilder();
546     for (UUID clusterId : clusterIds) {
547       uuidBuilder.setLeastSigBits(clusterId.getLeastSignificantBits());
548       uuidBuilder.setMostSigBits(clusterId.getMostSignificantBits());
549       builder.addClusterIds(uuidBuilder.build());
550     }
551     if (scopes != null) {
552       for (Map.Entry<byte[], Integer> e : scopes.entrySet()) {
553         ByteString family = (compressionContext == null) ?
554             HBaseZeroCopyByteString.wrap(e.getKey())
555             : compressor.compress(e.getKey(), compressionContext.familyDict);
556         builder.addScopes(FamilyScope.newBuilder()
557             .setFamily(family).setScopeType(ScopeType.valueOf(e.getValue())));
558       }
559     }
560     return builder;
561   }
562 
563   public void readFieldsFromPb(
564       WALKey walKey, WALCellCodec.ByteStringUncompressor uncompressor) throws IOException {
565     if (this.compressionContext != null) {
566       this.encodedRegionName = uncompressor.uncompress(
567           walKey.getEncodedRegionName(), compressionContext.regionDict);
568       byte[] tablenameBytes = uncompressor.uncompress(
569           walKey.getTableName(), compressionContext.tableDict);
570       this.tablename = TableName.valueOf(tablenameBytes);
571     } else {
572       this.encodedRegionName = walKey.getEncodedRegionName().toByteArray();
573       this.tablename = TableName.valueOf(walKey.getTableName().toByteArray());
574     }
575     clusterIds.clear();
576     if (walKey.hasClusterId()) {
577       //When we are reading the older log (0.95.1 release)
578       //This is definitely the originating cluster
579       clusterIds.add(new UUID(walKey.getClusterId().getMostSigBits(), walKey.getClusterId()
580           .getLeastSigBits()));
581     }
582     for (HBaseProtos.UUID clusterId : walKey.getClusterIdsList()) {
583       clusterIds.add(new UUID(clusterId.getMostSigBits(), clusterId.getLeastSigBits()));
584     }
585     if (walKey.hasNonceGroup()) {
586       this.nonceGroup = walKey.getNonceGroup();
587     }
588     if (walKey.hasNonce()) {
589       this.nonce = walKey.getNonce();
590     }
591     this.scopes = null;
592     if (walKey.getScopesCount() > 0) {
593       this.scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
594       for (FamilyScope scope : walKey.getScopesList()) {
595         byte[] family = (compressionContext == null) ? scope.getFamily().toByteArray() :
596           uncompressor.uncompress(scope.getFamily(), compressionContext.familyDict);
597         this.scopes.put(family, scope.getScopeType().getNumber());
598       }
599     }
600     this.logSeqNum = walKey.getLogSequenceNumber();
601     this.writeTime = walKey.getWriteTime();
602   }
603 }