View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.OutputStream;
26  import java.nio.ByteBuffer;
27  import java.util.ArrayList;
28  import java.util.List;
29  
30  import org.apache.hadoop.hbase.KeyValue.Type;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.io.util.StreamUtils;
33  import org.apache.hadoop.hbase.util.ByteBufferUtils;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.util.IterableUtils;
36  import org.apache.hadoop.io.IOUtils;
37  import org.apache.hadoop.io.WritableUtils;
38  
39  import com.google.common.base.Function;
40  import com.google.common.collect.Lists;
41  
42  /**
43   * static convenience methods for dealing with KeyValues and collections of KeyValues
44   */
45  @InterfaceAudience.Private
46  public class KeyValueUtil {
47  
48    /**************** length *********************/
49  
50    /**
51     * Returns number of bytes this cell would have been used if serialized as in {@link KeyValue}
52     * @param cell
53     * @return the length
54     */
55    public static int length(final Cell cell) {
56      return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
57          cell.getValueLength(), cell.getTagsLength(), true);
58    }
59  
60    public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
61      if (withTags) {
62        return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen));
63      }
64      return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen));
65    }
66  
67    /**
68     * Returns number of bytes this cell's key part would have been used if serialized as in
69     * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
70     * @param cell
71     * @return the key length
72     */
73    public static int keyLength(final Cell cell) {
74      return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
75    }
76  
77    private static int keyLength(short rlen, byte flen, int qlen) {
78      return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
79    }
80  
81    public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
82      int length = kv.getLength();
83      if (includeMvccVersion) {
84        length += WritableUtils.getVIntSize(kv.getSequenceId());
85      }
86      return length;
87    }
88  
89    public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
90        final boolean includeMvccVersion) {
91      int length = 0;
92      for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
93        length += lengthWithMvccVersion(kv, includeMvccVersion);
94      }
95      return length;
96    }
97  
98  
99    /**************** copy key only *********************/
100 
101   public static KeyValue copyToNewKeyValue(final Cell cell) {
102     byte[] bytes = copyToNewByteArray(cell);
103     KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
104     kvCell.setSequenceId(cell.getSequenceId());
105     return kvCell;
106   }
107 
108   /**
109    * The position will be set to the beginning of the new ByteBuffer
110    * @param cell
111    * @return the Bytebuffer containing the key part of the cell
112    */
113   public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
114     byte[] bytes = new byte[keyLength(cell)];
115     appendKeyTo(cell, bytes, 0);
116     ByteBuffer buffer = ByteBuffer.wrap(bytes);
117     return buffer;
118   }
119 
120   public static byte[] copyToNewByteArray(final Cell cell) {
121     int v1Length = length(cell);
122     byte[] backingBytes = new byte[v1Length];
123     appendToByteArray(cell, backingBytes, 0);
124     return backingBytes;
125   }
126 
127   public static int appendKeyTo(final Cell cell, final byte[] output,
128       final int offset) {
129     int nextOffset = offset;
130     nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
131     nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
132     nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
133     nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
134     nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
135     nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
136     nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
137     return nextOffset;
138   }
139 
140 
141   /**************** copy key and value *********************/
142 
143   public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
144     // TODO when cell instance of KV we can bypass all steps and just do backing single array
145     // copy(?)
146     int pos = offset;
147     pos = Bytes.putInt(output, pos, keyLength(cell));
148     pos = Bytes.putInt(output, pos, cell.getValueLength());
149     pos = appendKeyTo(cell, output, pos);
150     pos = CellUtil.copyValueTo(cell, output, pos);
151     if ((cell.getTagsLength() > 0)) {
152       pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
153       pos = CellUtil.copyTagTo(cell, output, pos);
154     }
155     return pos;
156   }
157 
158   /**
159    * The position will be set to the beginning of the new ByteBuffer
160    * @param cell
161    * @return the ByteBuffer containing the cell
162    */
163   public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
164     byte[] bytes = new byte[length(cell)];
165     appendToByteArray(cell, bytes, 0);
166     ByteBuffer buffer = ByteBuffer.wrap(bytes);
167     return buffer;
168   }
169 
170   public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
171       final boolean includeMvccVersion) {
172     // keep pushing the limit out. assume enough capacity
173     bb.limit(bb.position() + kv.getLength());
174     bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
175     if (includeMvccVersion) {
176       int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId());
177       ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
178       ByteBufferUtils.writeVLong(bb, kv.getSequenceId());
179     }
180   }
181 
182 
183   /**************** iterating *******************************/
184 
185   /**
186    * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
187    * position to the start of the next KeyValue. Does not allocate a new array or copy data.
188    * @param bb
189    * @param includesMvccVersion
190    * @param includesTags 
191    */
192   public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
193       boolean includesTags) {
194     if (bb.isDirect()) {
195       throw new IllegalArgumentException("only supports heap buffers");
196     }
197     if (bb.remaining() < 1) {
198       return null;
199     }
200     KeyValue keyValue = null;
201     int underlyingArrayOffset = bb.arrayOffset() + bb.position();
202     int keyLength = bb.getInt();
203     int valueLength = bb.getInt();
204     ByteBufferUtils.skip(bb, keyLength + valueLength);
205     int tagsLength = 0;
206     if (includesTags) {
207       // Read short as unsigned, high byte first
208       tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
209       ByteBufferUtils.skip(bb, tagsLength);
210     }
211     int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
212     keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
213     if (includesMvccVersion) {
214       long mvccVersion = ByteBufferUtils.readVLong(bb);
215       keyValue.setSequenceId(mvccVersion);
216     }
217     return keyValue;
218   }
219 
220 
221   /*************** next/previous **********************************/
222 
223   /**
224    * Decrement the timestamp.  For tests (currently wasteful)
225    *
226    * Remember timestamps are sorted reverse chronologically.
227    * @param in
228    * @return previous key
229    */
230   public static KeyValue previousKey(final KeyValue in) {
231     return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
232       CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
233   }
234   
235 
236   /**
237    * Create a KeyValue for the specified row, family and qualifier that would be
238    * larger than or equal to all other possible KeyValues that have the same
239    * row, family, qualifier. Used for reseeking.
240    *
241    * @param row
242    *          row key
243    * @param roffset
244    *         row offset
245    * @param rlength
246    *         row length
247    * @param family
248    *         family name
249    * @param foffset
250    *         family offset
251    * @param flength
252    *         family length
253    * @param qualifier
254    *        column qualifier
255    * @param qoffset
256    *        qualifier offset
257    * @param qlength
258    *        qualifier length
259    * @return Last possible key on passed row, family, qualifier.
260    */
261   public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
262       final byte[] family, final int foffset, final int flength, final byte[] qualifier,
263       final int qoffset, final int qlength) {
264     return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
265         qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
266   }
267 
268   /**
269    * Create a KeyValue that is smaller than all other possible KeyValues
270    * for the given row. That is any (valid) KeyValue on 'row' would sort
271    * _after_ the result.
272    *
273    * @param row - row key (arbitrary byte array)
274    * @return First possible KeyValue on passed <code>row</code>
275    */
276   public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) {
277     return new KeyValue(row, roffset, rlength,
278         null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
279   }
280 
281   /**
282    * Creates a KeyValue that is last on the specified row id. That is,
283    * every other possible KeyValue for the given row would compareTo()
284    * less than the result of this call.
285    * @param row row key
286    * @return Last possible KeyValue on passed <code>row</code>
287    */
288   public static KeyValue createLastOnRow(final byte[] row) {
289     return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum);
290   }
291 
292   /**
293    * Create a KeyValue that is smaller than all other possible KeyValues
294    * for the given row. That is any (valid) KeyValue on 'row' would sort
295    * _after_ the result.
296    *
297    * @param row - row key (arbitrary byte array)
298    * @return First possible KeyValue on passed <code>row</code>
299    */
300   public static KeyValue createFirstOnRow(final byte [] row) {
301     return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
302   }
303 
304   /**
305    * Creates a KeyValue that is smaller than all other KeyValues that
306    * are older than the passed timestamp.
307    * @param row - row key (arbitrary byte array)
308    * @param ts - timestamp
309    * @return First possible key on passed <code>row</code> and timestamp.
310    */
311   public static KeyValue createFirstOnRow(final byte [] row,
312       final long ts) {
313     return new KeyValue(row, null, null, ts, Type.Maximum);
314   }
315 
316   /**
317    * Create a KeyValue for the specified row, family and qualifier that would be
318    * smaller than all other possible KeyValues that have the same row,family,qualifier.
319    * Used for seeking.
320    * @param row - row key (arbitrary byte array)
321    * @param family - family name
322    * @param qualifier - column qualifier
323    * @return First possible key on passed <code>row</code>, and column.
324    */
325   public static KeyValue createFirstOnRow(final byte [] row, final byte [] family,
326       final byte [] qualifier) {
327     return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum);
328   }
329 
330   /**
331    * @param row - row key (arbitrary byte array)
332    * @param f - family name
333    * @param q - column qualifier
334    * @param ts - timestamp
335    * @return First possible key on passed <code>row</code>, column and timestamp
336    */
337   public static KeyValue createFirstOnRow(final byte [] row, final byte [] f,
338       final byte [] q, final long ts) {
339     return new KeyValue(row, f, q, ts, Type.Maximum);
340   }
341 
342   /**
343    * Create a KeyValue for the specified row, family and qualifier that would be
344    * smaller than all other possible KeyValues that have the same row,
345    * family, qualifier.
346    * Used for seeking.
347    * @param row row key
348    * @param roffset row offset
349    * @param rlength row length
350    * @param family family name
351    * @param foffset family offset
352    * @param flength family length
353    * @param qualifier column qualifier
354    * @param qoffset qualifier offset
355    * @param qlength qualifier length
356    * @return First possible key on passed Row, Family, Qualifier.
357    */
358   public static KeyValue createFirstOnRow(final byte [] row,
359       final int roffset, final int rlength, final byte [] family,
360       final int foffset, final int flength, final byte [] qualifier,
361       final int qoffset, final int qlength) {
362     return new KeyValue(row, roffset, rlength, family,
363         foffset, flength, qualifier, qoffset, qlength,
364         HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
365   }
366 
367   /**
368    * Create a KeyValue for the specified row, family and qualifier that would be
369    * smaller than all other possible KeyValues that have the same row,
370    * family, qualifier.
371    * Used for seeking.
372    *
373    * @param buffer the buffer to use for the new <code>KeyValue</code> object
374    * @param row the value key
375    * @param family family name
376    * @param qualifier column qualifier
377    *
378    * @return First possible key on passed Row, Family, Qualifier.
379    *
380    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
381    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
382    */
383   public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row,
384       final byte [] family, final byte [] qualifier)
385           throws IllegalArgumentException {
386     return createFirstOnRow(buffer, 0, row, 0, row.length,
387         family, 0, family.length,
388         qualifier, 0, qualifier.length);
389   }
390 
391   /**
392    * Create a KeyValue for the specified row, family and qualifier that would be
393    * smaller than all other possible KeyValues that have the same row,
394    * family, qualifier.
395    * Used for seeking.
396    *
397    * @param buffer the buffer to use for the new <code>KeyValue</code> object
398    * @param boffset buffer offset
399    * @param row the value key
400    * @param roffset row offset
401    * @param rlength row length
402    * @param family family name
403    * @param foffset family offset
404    * @param flength family length
405    * @param qualifier column qualifier
406    * @param qoffset qualifier offset
407    * @param qlength qualifier length
408    *
409    * @return First possible key on passed Row, Family, Qualifier.
410    *
411    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
412    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
413    */
414   public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
415       final int roffset, final int rlength, final byte[] family, final int foffset,
416       final int flength, final byte[] qualifier, final int qoffset, final int qlength)
417       throws IllegalArgumentException {
418 
419     long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
420 
421     if (lLength > Integer.MAX_VALUE) {
422       throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
423     }
424     int iLength = (int) lLength;
425     if (buffer.length - boffset < iLength) {
426       throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < "
427           + iLength);
428     }
429 
430     int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
431         flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
432         null, 0, 0, null);
433     return new KeyValue(buffer, boffset, len);
434   }
435 
436   /*************** misc **********************************/
437   /**
438    * @param cell
439    * @return <code>cell</code> if it is an instance of {@link KeyValue} else we will return a
440    * new {@link KeyValue} instance made from <code>cell</code>
441    * @deprecated without any replacement.
442    */
443   @Deprecated
444   public static KeyValue ensureKeyValue(final Cell cell) {
445     if (cell == null) return null;
446     return cell instanceof KeyValue? (KeyValue)cell: copyToNewKeyValue(cell);
447   }
448 
449   @Deprecated
450   public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
451     List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
452       public KeyValue apply(Cell arg0) {
453         return KeyValueUtil.ensureKeyValue(arg0);
454       }
455     });
456     return new ArrayList<KeyValue>(lazyList);
457   }
458   /**
459    * Write out a KeyValue in the manner in which we used to when KeyValue was a
460    * Writable.
461    *
462    * @param kv
463    * @param out
464    * @return Length written on stream
465    * @throws IOException
466    * @see #create(DataInput) for the inverse function
467    */
468   public static long write(final KeyValue kv, final DataOutput out) throws IOException {
469     // This is how the old Writables write used to serialize KVs. Need to figure
470     // way to make it
471     // work for all implementations.
472     int length = kv.getLength();
473     out.writeInt(length);
474     out.write(kv.getBuffer(), kv.getOffset(), length);
475     return length + Bytes.SIZEOF_INT;
476   }
477 
478   /**
479    * Create a KeyValue reading from the raw InputStream. Named
480    * <code>iscreate</code> so doesn't clash with {@link #create(DataInput)}
481    *
482    * @param in
483    * @param withTags whether the keyvalue should include tags are not
484    * @return Created KeyValue OR if we find a length of zero, we will return
485    *         null which can be useful marking a stream as done.
486    * @throws IOException
487    */
488   public static KeyValue iscreate(final InputStream in, boolean withTags) throws IOException {
489     byte[] intBytes = new byte[Bytes.SIZEOF_INT];
490     int bytesRead = 0;
491     while (bytesRead < intBytes.length) {
492       int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead);
493       if (n < 0) {
494         if (bytesRead == 0)
495           return null; // EOF at start is ok
496         throw new IOException("Failed read of int, read " + bytesRead + " bytes");
497       }
498       bytesRead += n;
499     }
500     // TODO: perhaps some sanity check is needed here.
501     byte[] bytes = new byte[Bytes.toInt(intBytes)];
502     IOUtils.readFully(in, bytes, 0, bytes.length);
503     if (withTags) {
504       return new KeyValue(bytes, 0, bytes.length);
505     } else {
506       return new NoTagsKeyValue(bytes, 0, bytes.length);
507     }
508   }
509 
510   /**
511    * @param b
512    * @return A KeyValue made of a byte array that holds the key-only part.
513    *         Needed to convert hfile index members to KeyValues.
514    */
515   public static KeyValue createKeyValueFromKey(final byte[] b) {
516     return createKeyValueFromKey(b, 0, b.length);
517   }
518 
519   /**
520    * @param bb
521    * @return A KeyValue made of a byte buffer that holds the key-only part.
522    *         Needed to convert hfile index members to KeyValues.
523    */
524   public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
525     return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
526   }
527 
528   /**
529    * @param b
530    * @param o
531    * @param l
532    * @return A KeyValue made of a byte array that holds the key-only part.
533    *         Needed to convert hfile index members to KeyValues.
534    */
535   public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) {
536     byte[] newb = new byte[l + KeyValue.ROW_OFFSET];
537     System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l);
538     Bytes.putInt(newb, 0, l);
539     Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
540     return new KeyValue(newb);
541   }
542 
543   /**
544    * @param in
545    *          Where to read bytes from. Creates a byte array to hold the
546    *          KeyValue backing bytes copied from the steam.
547    * @return KeyValue created by deserializing from <code>in</code> OR if we
548    *         find a length of zero, we will return null which can be useful
549    *         marking a stream as done.
550    * @throws IOException
551    */
552   public static KeyValue create(final DataInput in) throws IOException {
553     return create(in.readInt(), in);
554   }
555 
556   /**
557    * Create a KeyValue reading <code>length</code> from <code>in</code>
558    * 
559    * @param length
560    * @param in
561    * @return Created KeyValue OR if we find a length of zero, we will return
562    *         null which can be useful marking a stream as done.
563    * @throws IOException
564    */
565   public static KeyValue create(int length, final DataInput in) throws IOException {
566 
567     if (length <= 0) {
568       if (length == 0)
569         return null;
570       throw new IOException("Failed read " + length + " bytes, stream corrupt?");
571     }
572 
573     // This is how the old Writables.readFrom used to deserialize. Didn't even
574     // vint.
575     byte[] bytes = new byte[length];
576     in.readFully(bytes);
577     return new KeyValue(bytes, 0, length);
578   }
579 
580   public static void oswrite(final Cell cell, final OutputStream out, final boolean withTags)
581       throws IOException {
582     if (cell instanceof Streamable) {
583       ((Streamable)cell).write(out, withTags);
584     } else {
585       short rlen = cell.getRowLength();
586       byte flen = cell.getFamilyLength();
587       int qlen = cell.getQualifierLength();
588       int vlen = cell.getValueLength();
589       int tlen = cell.getTagsLength();
590 
591       // write total length
592       KeyValue.writeInt(out, length(rlen, flen, qlen, vlen, tlen, withTags));
593       // write key length
594       KeyValue.writeInt(out, keyLength(rlen, flen, qlen));
595       // write value length
596       KeyValue.writeInt(out, vlen);
597       // Write rowkey - 2 bytes rk length followed by rowkey bytes
598       StreamUtils.writeShort(out, rlen);
599       out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
600       // Write cf - 1 byte of cf length followed by the family bytes
601       out.write(flen);
602       out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
603       // write qualifier
604       out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
605       // write timestamp
606       StreamUtils.writeLong(out, cell.getTimestamp());
607       // write the type
608       out.write(cell.getTypeByte());
609       // write value
610       out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
611       // write tags if we have to
612       if (withTags && tlen > 0) {
613         // 2 bytes tags length followed by tags bytes
614         // tags length is serialized with 2 bytes only(short way) even if the
615         // type is int. As this
616         // is non -ve numbers, we save the sign bit. See HBASE-11437
617         out.write((byte) (0xff & (tlen >> 8)));
618         out.write((byte) (0xff & tlen));
619         out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
620       }
621     }
622   }
623 }