View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  import java.nio.ByteBuffer;
24  import java.util.ArrayList;
25  import java.util.List;
26  
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.KeyValue.Type;
29  import org.apache.hadoop.hbase.io.util.StreamUtils;
30  import org.apache.hadoop.hbase.util.ByteBufferUtils;
31  import org.apache.hadoop.hbase.util.Bytes;
32  import org.apache.hadoop.hbase.util.IterableUtils;
33  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
34  import org.apache.hadoop.io.WritableUtils;
35  
36  import com.google.common.base.Function;
37  import com.google.common.collect.Lists;
38  
39  /**
40   * static convenience methods for dealing with KeyValues and collections of KeyValues
41   */
42  @InterfaceAudience.Private
43  public class KeyValueUtil {
44  
45    /**************** length *********************/
46  
47    /**
48     * Returns number of bytes this cell would have been used if serialized as in {@link KeyValue}
49     * @param cell
50     * @return the length
51     */
52    public static int length(final Cell cell) {
53      return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
54          cell.getValueLength(), cell.getTagsLength(), true);
55    }
56  
57    private static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
58      if (withTags) {
59        return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen));
60      }
61      return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen));
62    }
63  
64    /**
65     * Returns number of bytes this cell's key part would have been used if serialized as in
66     * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
67     * @param cell
68     * @return the key length
69     */
70    public static int keyLength(final Cell cell) {
71      return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
72    }
73  
74    private static int keyLength(short rlen, byte flen, int qlen) {
75      return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
76    }
77  
78    public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
79      int length = kv.getLength();
80      if (includeMvccVersion) {
81        length += WritableUtils.getVIntSize(kv.getMvccVersion());
82      }
83      return length;
84    }
85  
86    public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
87        final boolean includeMvccVersion) {
88      int length = 0;
89      for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
90        length += lengthWithMvccVersion(kv, includeMvccVersion);
91      }
92      return length;
93    }
94  
95  
96    /**************** copy key only *********************/
97  
98    public static KeyValue copyToNewKeyValue(final Cell cell) {
99      byte[] bytes = copyToNewByteArray(cell);
100     KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
101     kvCell.setSequenceId(cell.getMvccVersion());
102     return kvCell;
103   }
104 
105   public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
106     byte[] bytes = new byte[keyLength(cell)];
107     appendKeyTo(cell, bytes, 0);
108     ByteBuffer buffer = ByteBuffer.wrap(bytes);
109     buffer.position(buffer.limit());//make it look as if each field were appended
110     return buffer;
111   }
112 
113   public static byte[] copyToNewByteArray(final Cell cell) {
114     int v1Length = length(cell);
115     byte[] backingBytes = new byte[v1Length];
116     appendToByteArray(cell, backingBytes, 0);
117     return backingBytes;
118   }
119 
120   public static int appendKeyTo(final Cell cell, final byte[] output,
121       final int offset) {
122     int nextOffset = offset;
123     nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
124     nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
125     nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
126     nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
127     nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
128     nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
129     nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
130     return nextOffset;
131   }
132 
133 
134   /**************** copy key and value *********************/
135 
136   public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
137     // TODO when cell instance of KV we can bypass all steps and just do backing single array
138     // copy(?)
139     int pos = offset;
140     pos = Bytes.putInt(output, pos, keyLength(cell));
141     pos = Bytes.putInt(output, pos, cell.getValueLength());
142     pos = appendKeyTo(cell, output, pos);
143     pos = CellUtil.copyValueTo(cell, output, pos);
144     if ((cell.getTagsLength() > 0)) {
145       pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
146       pos = CellUtil.copyTagTo(cell, output, pos);
147     }
148     return pos;
149   }
150 
151   public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
152     byte[] bytes = new byte[length(cell)];
153     appendToByteArray(cell, bytes, 0);
154     ByteBuffer buffer = ByteBuffer.wrap(bytes);
155     buffer.position(buffer.limit());//make it look as if each field were appended
156     return buffer;
157   }
158 
159   public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
160       final boolean includeMvccVersion) {
161     // keep pushing the limit out. assume enough capacity
162     bb.limit(bb.position() + kv.getLength());
163     bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
164     if (includeMvccVersion) {
165       int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion());
166       ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
167       ByteBufferUtils.writeVLong(bb, kv.getMvccVersion());
168     }
169   }
170 
171 
172   /**************** iterating *******************************/
173 
174   /**
175    * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
176    * position to the start of the next KeyValue. Does not allocate a new array or copy data.
177    * @param bb
178    * @param includesMvccVersion
179    * @param includesTags 
180    */
181   public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
182       boolean includesTags) {
183     if (bb.isDirect()) {
184       throw new IllegalArgumentException("only supports heap buffers");
185     }
186     if (bb.remaining() < 1) {
187       return null;
188     }
189     KeyValue keyValue = null;
190     int underlyingArrayOffset = bb.arrayOffset() + bb.position();
191     int keyLength = bb.getInt();
192     int valueLength = bb.getInt();
193     ByteBufferUtils.skip(bb, keyLength + valueLength);
194     int tagsLength = 0;
195     if (includesTags) {
196       // Read short as unsigned, high byte first
197       tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
198       ByteBufferUtils.skip(bb, tagsLength);
199     }
200     int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
201     keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
202     if (includesMvccVersion) {
203       long mvccVersion = ByteBufferUtils.readVLong(bb);
204       keyValue.setSequenceId(mvccVersion);
205     }
206     return keyValue;
207   }
208 
209 
210   /*************** next/previous **********************************/
211 
212   /**
213    * Append single byte 0x00 to the end of the input row key
214    */
215   public static KeyValue createFirstKeyInNextRow(final Cell in){
216     byte[] nextRow = new byte[in.getRowLength() + 1];
217     System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength());
218     nextRow[nextRow.length - 1] = 0;//maybe not necessary
219     return createFirstOnRow(nextRow);
220   }
221 
222   /**
223    * Increment the row bytes and clear the other fields
224    */
225   public static KeyValue createFirstKeyInIncrementedRow(final Cell in){
226     byte[] thisRow = new SimpleMutableByteRange(in.getRowArray(), in.getRowOffset(),
227         in.getRowLength()).deepCopyToNewArray();
228     byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow);
229     return createFirstOnRow(nextRow);
230   }
231 
232   /**
233    * Decrement the timestamp.  For tests (currently wasteful)
234    *
235    * Remember timestamps are sorted reverse chronologically.
236    * @param in
237    * @return previous key
238    */
239   public static KeyValue previousKey(final KeyValue in) {
240     return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
241       CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
242   }
243   
244 
245   /**
246    * Create a KeyValue for the specified row, family and qualifier that would be
247    * larger than or equal to all other possible KeyValues that have the same
248    * row, family, qualifier. Used for reseeking.
249    *
250    * @param row
251    *          row key
252    * @param roffset
253    *         row offset
254    * @param rlength
255    *         row length
256    * @param family
257    *         family name
258    * @param foffset
259    *         family offset
260    * @param flength
261    *         family length
262    * @param qualifier
263    *        column qualifier
264    * @param qoffset
265    *        qualifier offset
266    * @param qlength
267    *        qualifier length
268    * @return Last possible key on passed row, family, qualifier.
269    */
270   public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
271       final byte[] family, final int foffset, final int flength, final byte[] qualifier,
272       final int qoffset, final int qlength) {
273     return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
274         qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
275   }
276   
277   /**
278    * Creates a keyValue for the specified keyvalue larger than or equal to all other possible
279    * KeyValues that have the same row, family, qualifer.  Used for reseeking
280    * @param kv
281    * @return KeyValue
282    */
283   public static KeyValue createLastOnRow(Cell kv) {
284     return createLastOnRow(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), null, 0, 0,
285         null, 0, 0);
286   }
287 
288   /**
289    * Similar to
290    * {@link #createLastOnRow(byte[], int, int, byte[], int, int, byte[], int, int)}
291    * but creates the last key on the row/column of this KV (the value part of
292    * the returned KV is always empty). Used in creating "fake keys" for the
293    * multi-column Bloom filter optimization to skip the row/column we already
294    * know is not in the file.
295    * 
296    * @param kv - cell
297    * @return the last key on the row/column of the given key-value pair
298    */
299   public static KeyValue createLastOnRowCol(Cell kv) {
300     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
301         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
302         kv.getQualifierOffset(), kv.getQualifierLength(), HConstants.OLDEST_TIMESTAMP,
303         Type.Minimum, null, 0, 0);
304   }
305 
306   /**
307    * Creates the first KV with the row/family/qualifier of this KV and the given
308    * timestamp. Uses the "maximum" KV type that guarantees that the new KV is
309    * the lowest possible for this combination of row, family, qualifier, and
310    * timestamp. This KV's own timestamp is ignored. While this function copies
311    * the value from this KV, it is normally used on key-only KVs.
312    * 
313    * @param kv - cell
314    * @param ts
315    */
316   public static KeyValue createFirstOnRowColTS(Cell kv, long ts) {
317     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
318         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
319         kv.getQualifierOffset(), kv.getQualifierLength(), ts, Type.Maximum, kv.getValueArray(),
320         kv.getValueOffset(), kv.getValueLength());
321   }
322   
323   /**
324    * Create a KeyValue that is smaller than all other possible KeyValues
325    * for the given row. That is any (valid) KeyValue on 'row' would sort
326    * _after_ the result.
327    *
328    * @param row - row key (arbitrary byte array)
329    * @return First possible KeyValue on passed <code>row</code>
330    */
331   public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) {
332     return new KeyValue(row, roffset, rlength,
333         null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
334   }
335   
336 
337   /**
338    * Creates a KeyValue that is last on the specified row id. That is,
339    * every other possible KeyValue for the given row would compareTo()
340    * less than the result of this call.
341    * @param row row key
342    * @return Last possible KeyValue on passed <code>row</code>
343    */
344   public static KeyValue createLastOnRow(final byte[] row) {
345     return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum);
346   }
347 
348   /**
349    * Create a KeyValue that is smaller than all other possible KeyValues
350    * for the given row. That is any (valid) KeyValue on 'row' would sort
351    * _after_ the result.
352    *
353    * @param row - row key (arbitrary byte array)
354    * @return First possible KeyValue on passed <code>row</code>
355    */
356   public static KeyValue createFirstOnRow(final byte [] row) {
357     return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
358   }
359 
360   /**
361    * Creates a KeyValue that is smaller than all other KeyValues that
362    * are older than the passed timestamp.
363    * @param row - row key (arbitrary byte array)
364    * @param ts - timestamp
365    * @return First possible key on passed <code>row</code> and timestamp.
366    */
367   public static KeyValue createFirstOnRow(final byte [] row,
368       final long ts) {
369     return new KeyValue(row, null, null, ts, Type.Maximum);
370   }
371 
372   /**
373    * Create a KeyValue for the specified row, family and qualifier that would be
374    * smaller than all other possible KeyValues that have the same row,family,qualifier.
375    * Used for seeking.
376    * @param row - row key (arbitrary byte array)
377    * @param family - family name
378    * @param qualifier - column qualifier
379    * @return First possible key on passed <code>row</code>, and column.
380    */
381   public static KeyValue createFirstOnRow(final byte [] row, final byte [] family,
382       final byte [] qualifier) {
383     return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum);
384   }
385 
386   /**
387    * Create a Delete Family KeyValue for the specified row and family that would
388    * be smaller than all other possible Delete Family KeyValues that have the
389    * same row and family.
390    * Used for seeking.
391    * @param row - row key (arbitrary byte array)
392    * @param family - family name
393    * @return First Delete Family possible key on passed <code>row</code>.
394    */
395   public static KeyValue createFirstDeleteFamilyOnRow(final byte [] row,
396       final byte [] family) {
397     return new KeyValue(row, family, null, HConstants.LATEST_TIMESTAMP,
398         Type.DeleteFamily);
399   }
400 
401   /**
402    * @param row - row key (arbitrary byte array)
403    * @param f - family name
404    * @param q - column qualifier
405    * @param ts - timestamp
406    * @return First possible key on passed <code>row</code>, column and timestamp
407    */
408   public static KeyValue createFirstOnRow(final byte [] row, final byte [] f,
409       final byte [] q, final long ts) {
410     return new KeyValue(row, f, q, ts, Type.Maximum);
411   }
412 
413   /**
414    * Create a KeyValue for the specified row, family and qualifier that would be
415    * smaller than all other possible KeyValues that have the same row,
416    * family, qualifier.
417    * Used for seeking.
418    * @param row row key
419    * @param roffset row offset
420    * @param rlength row length
421    * @param family family name
422    * @param foffset family offset
423    * @param flength family length
424    * @param qualifier column qualifier
425    * @param qoffset qualifier offset
426    * @param qlength qualifier length
427    * @return First possible key on passed Row, Family, Qualifier.
428    */
429   public static KeyValue createFirstOnRow(final byte [] row,
430       final int roffset, final int rlength, final byte [] family,
431       final int foffset, final int flength, final byte [] qualifier,
432       final int qoffset, final int qlength) {
433     return new KeyValue(row, roffset, rlength, family,
434         foffset, flength, qualifier, qoffset, qlength,
435         HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
436   }
437 
438   /**
439    * Create a KeyValue for the specified row, family and qualifier that would be
440    * smaller than all other possible KeyValues that have the same row,
441    * family, qualifier.
442    * Used for seeking.
443    *
444    * @param buffer the buffer to use for the new <code>KeyValue</code> object
445    * @param row the value key
446    * @param family family name
447    * @param qualifier column qualifier
448    *
449    * @return First possible key on passed Row, Family, Qualifier.
450    *
451    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
452    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
453    */
454   public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row,
455       final byte [] family, final byte [] qualifier)
456           throws IllegalArgumentException {
457     return createFirstOnRow(buffer, 0, row, 0, row.length,
458         family, 0, family.length,
459         qualifier, 0, qualifier.length);
460   }
461 
462   /**
463    * Create a KeyValue for the specified row, family and qualifier that would be
464    * smaller than all other possible KeyValues that have the same row,
465    * family, qualifier.
466    * Used for seeking.
467    *
468    * @param buffer the buffer to use for the new <code>KeyValue</code> object
469    * @param boffset buffer offset
470    * @param row the value key
471    * @param roffset row offset
472    * @param rlength row length
473    * @param family family name
474    * @param foffset family offset
475    * @param flength family length
476    * @param qualifier column qualifier
477    * @param qoffset qualifier offset
478    * @param qlength qualifier length
479    *
480    * @return First possible key on passed Row, Family, Qualifier.
481    *
482    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
483    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
484    */
485   public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
486       final int roffset, final int rlength, final byte[] family, final int foffset,
487       final int flength, final byte[] qualifier, final int qoffset, final int qlength)
488       throws IllegalArgumentException {
489 
490     long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
491 
492     if (lLength > Integer.MAX_VALUE) {
493       throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
494     }
495     int iLength = (int) lLength;
496     if (buffer.length - boffset < iLength) {
497       throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < "
498           + iLength);
499     }
500 
501     int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
502         flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
503         null, 0, 0, null);
504     return new KeyValue(buffer, boffset, len);
505   }
506 
507   /**
508    * Creates the first KV with the row/family/qualifier of this KV and the
509    * given timestamp. Uses the "maximum" KV type that guarantees that the new
510    * KV is the lowest possible for this combination of row, family, qualifier,
511    * and timestamp. This KV's own timestamp is ignored. While this function
512    * copies the value from this KV, it is normally used on key-only KVs.
513    */
514   public static KeyValue createFirstOnRowColTS(KeyValue kv, long ts) {
515     return new KeyValue(
516         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
517         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
518         kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
519         ts, Type.Maximum, kv.getValueArray(), kv.getValueOffset(), kv.getValueLength());
520   }
521 
522   /*************** misc **********************************/
523   /**
524    * @param cell
525    * @return <code>cell<code> if it is an instance of {@link KeyValue} else we will return a
526    * new {@link KeyValue} instance made from <code>cell</code>
527    * @deprecated without any replacement.
528    */
529   @Deprecated
530   public static KeyValue ensureKeyValue(final Cell cell) {
531     if (cell == null) return null;
532     return cell instanceof KeyValue? (KeyValue)cell: copyToNewKeyValue(cell);
533   }
534 
535   @Deprecated
536   public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
537     List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
538       public KeyValue apply(Cell arg0) {
539         return KeyValueUtil.ensureKeyValue(arg0);
540       }
541     });
542     return new ArrayList<KeyValue>(lazyList);
543   }
544 
545   public static void oswrite(final Cell cell, final OutputStream out, final boolean withTags)
546       throws IOException {
547     if (cell instanceof KeyValue) {
548       KeyValue.oswrite((KeyValue) cell, out, withTags);
549     } else {
550       short rlen = cell.getRowLength();
551       byte flen = cell.getFamilyLength();
552       int qlen = cell.getQualifierLength();
553       int vlen = cell.getValueLength();
554       int tlen = cell.getTagsLength();
555 
556       // write total length
557       StreamUtils.writeInt(out, length(rlen, flen, qlen, vlen, tlen, withTags));
558       // write key length
559       StreamUtils.writeInt(out, keyLength(rlen, flen, qlen));
560       // write value length
561       StreamUtils.writeInt(out, vlen);
562       // Write rowkey - 2 bytes rk length followed by rowkey bytes
563       StreamUtils.writeShort(out, rlen);
564       out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
565       // Write cf - 1 byte of cf length followed by the family bytes
566       out.write(flen);
567       out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
568       // write qualifier
569       out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
570       // write timestamp
571       StreamUtils.writeLong(out, cell.getTimestamp());
572       // write the type
573       out.write(cell.getTypeByte());
574       // write value
575       out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
576       // write tags if we have to
577       if (withTags) {
578         // 2 bytes tags length followed by tags bytes
579         // tags length is serialized with 2 bytes only(short way) even if the type is int. As this
580         // is non -ve numbers, we save the sign bit. See HBASE-11437
581         out.write((byte) (0xff & (tlen >> 8)));
582         out.write((byte) (0xff & tlen));
583         out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
584       }
585     }
586   }
587 }