View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.nio.ByteBuffer;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.KeyValue.Type;
27  import org.apache.hadoop.hbase.util.ByteBufferUtils;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.hbase.util.IterableUtils;
30  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
31  import org.apache.hadoop.io.WritableUtils;
32  
33  import com.google.common.base.Function;
34  import com.google.common.collect.Lists;
35  
36  /**
37   * static convenience methods for dealing with KeyValues and collections of KeyValues
38   */
39  @InterfaceAudience.Private
40  public class KeyValueUtil {
41  
42    /**************** length *********************/
43  
44    public static int length(final Cell cell) {
45      return (int) (KeyValue.getKeyValueDataStructureSize(cell.getRowLength(),
46          cell.getFamilyLength(), cell.getQualifierLength(), cell.getValueLength(),
47          cell.getTagsLength()));
48    }
49  
50    protected static int keyLength(final Cell cell) {
51      return (int)KeyValue.getKeyDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
52        cell.getQualifierLength());
53    }
54  
55    public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
56      int length = kv.getLength();
57      if (includeMvccVersion) {
58        length += WritableUtils.getVIntSize(kv.getMvccVersion());
59      }
60      return length;
61    }
62  
63    public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
64        final boolean includeMvccVersion) {
65      int length = 0;
66      for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
67        length += lengthWithMvccVersion(kv, includeMvccVersion);
68      }
69      return length;
70    }
71  
72  
73    /**************** copy key only *********************/
74  
75    public static KeyValue copyToNewKeyValue(final Cell cell) {
76      byte[] bytes = copyToNewByteArray(cell);
77      KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
78      kvCell.setSequenceId(cell.getMvccVersion());
79      return kvCell;
80    }
81  
82    public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
83      byte[] bytes = new byte[keyLength(cell)];
84      appendKeyToByteArrayWithoutValue(cell, bytes, 0);
85      ByteBuffer buffer = ByteBuffer.wrap(bytes);
86      buffer.position(buffer.limit());//make it look as if each field were appended
87      return buffer;
88    }
89  
90    public static byte[] copyToNewByteArray(final Cell cell) {
91      int v1Length = length(cell);
92      byte[] backingBytes = new byte[v1Length];
93      appendToByteArray(cell, backingBytes, 0);
94      return backingBytes;
95    }
96  
97    protected static int appendKeyToByteArrayWithoutValue(final Cell cell, final byte[] output,
98        final int offset) {
99      int nextOffset = offset;
100     nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
101     nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
102     nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
103     nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
104     nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
105     nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
106     nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
107     return nextOffset;
108   }
109 
110 
111   /**************** copy key and value *********************/
112 
113   public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
114     int pos = offset;
115     pos = Bytes.putInt(output, pos, keyLength(cell));
116     pos = Bytes.putInt(output, pos, cell.getValueLength());
117     pos = appendKeyToByteArrayWithoutValue(cell, output, pos);
118     pos = CellUtil.copyValueTo(cell, output, pos);
119     if ((cell.getTagsLength() > 0)) {
120       pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
121       pos = CellUtil.copyTagTo(cell, output, pos);
122     }
123     return pos;
124   }
125 
126   public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
127     byte[] bytes = new byte[length(cell)];
128     appendToByteArray(cell, bytes, 0);
129     ByteBuffer buffer = ByteBuffer.wrap(bytes);
130     buffer.position(buffer.limit());//make it look as if each field were appended
131     return buffer;
132   }
133 
134   public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
135       final boolean includeMvccVersion) {
136     // keep pushing the limit out. assume enough capacity
137     bb.limit(bb.position() + kv.getLength());
138     bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
139     if (includeMvccVersion) {
140       int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion());
141       ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
142       ByteBufferUtils.writeVLong(bb, kv.getMvccVersion());
143     }
144   }
145 
146 
147   /**************** iterating *******************************/
148 
149   /**
150    * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
151    * position to the start of the next KeyValue. Does not allocate a new array or copy data.
152    * @param bb
153    * @param includesMvccVersion
154    * @param includesTags 
155    */
156   public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
157       boolean includesTags) {
158     if (bb.isDirect()) {
159       throw new IllegalArgumentException("only supports heap buffers");
160     }
161     if (bb.remaining() < 1) {
162       return null;
163     }
164     KeyValue keyValue = null;
165     int underlyingArrayOffset = bb.arrayOffset() + bb.position();
166     int keyLength = bb.getInt();
167     int valueLength = bb.getInt();
168     ByteBufferUtils.skip(bb, keyLength + valueLength);
169     int tagsLength = 0;
170     if (includesTags) {
171       // Read short as unsigned, high byte first
172       tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
173       ByteBufferUtils.skip(bb, tagsLength);
174     }
175     int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
176     keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
177     if (includesMvccVersion) {
178       long mvccVersion = ByteBufferUtils.readVLong(bb);
179       keyValue.setSequenceId(mvccVersion);
180     }
181     return keyValue;
182   }
183 
184 
185   /*************** next/previous **********************************/
186 
187   /**
188    * Append single byte 0x00 to the end of the input row key
189    */
190   public static KeyValue createFirstKeyInNextRow(final Cell in){
191     byte[] nextRow = new byte[in.getRowLength() + 1];
192     System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength());
193     nextRow[nextRow.length - 1] = 0;//maybe not necessary
194     return createFirstOnRow(nextRow);
195   }
196 
197   /**
198    * Increment the row bytes and clear the other fields
199    */
200   public static KeyValue createFirstKeyInIncrementedRow(final Cell in){
201     byte[] thisRow = new SimpleMutableByteRange(in.getRowArray(), in.getRowOffset(),
202         in.getRowLength()).deepCopyToNewArray();
203     byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow);
204     return createFirstOnRow(nextRow);
205   }
206 
207   /**
208    * Decrement the timestamp.  For tests (currently wasteful)
209    *
210    * Remember timestamps are sorted reverse chronologically.
211    * @param in
212    * @return previous key
213    */
214   public static KeyValue previousKey(final KeyValue in) {
215     return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
216       CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
217   }
218   
219 
220   /**
221    * Create a KeyValue for the specified row, family and qualifier that would be
222    * larger than or equal to all other possible KeyValues that have the same
223    * row, family, qualifier. Used for reseeking.
224    *
225    * @param row
226    *          row key
227    * @param roffset
228    *         row offset
229    * @param rlength
230    *         row length
231    * @param family
232    *         family name
233    * @param foffset
234    *         family offset
235    * @param flength
236    *         family length
237    * @param qualifier
238    *        column qualifier
239    * @param qoffset
240    *        qualifier offset
241    * @param qlength
242    *        qualifier length
243    * @return Last possible key on passed row, family, qualifier.
244    */
245   public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
246       final byte[] family, final int foffset, final int flength, final byte[] qualifier,
247       final int qoffset, final int qlength) {
248     return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
249         qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
250   }
251   
252   /**
253    * Creates a keyValue for the specified keyvalue larger than or equal to all other possible
254    * KeyValues that have the same row, family, qualifer.  Used for reseeking
255    * @param kv
256    * @return KeyValue
257    */
258   public static KeyValue createLastOnRow(Cell kv) {
259     return createLastOnRow(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), null, 0, 0,
260         null, 0, 0);
261   }
262 
263   /**
264    * Similar to
265    * {@link #createLastOnRow(byte[], int, int, byte[], int, int, byte[], int, int)}
266    * but creates the last key on the row/column of this KV (the value part of
267    * the returned KV is always empty). Used in creating "fake keys" for the
268    * multi-column Bloom filter optimization to skip the row/column we already
269    * know is not in the file.
270    * 
271    * @param kv - cell
272    * @return the last key on the row/column of the given key-value pair
273    */
274   public static KeyValue createLastOnRowCol(Cell kv) {
275     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
276         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
277         kv.getQualifierOffset(), kv.getQualifierLength(), HConstants.OLDEST_TIMESTAMP,
278         Type.Minimum, null, 0, 0);
279   }
280 
281   /**
282    * Creates the first KV with the row/family/qualifier of this KV and the given
283    * timestamp. Uses the "maximum" KV type that guarantees that the new KV is
284    * the lowest possible for this combination of row, family, qualifier, and
285    * timestamp. This KV's own timestamp is ignored. While this function copies
286    * the value from this KV, it is normally used on key-only KVs.
287    * 
288    * @param kv - cell
289    * @param ts
290    */
291   public static KeyValue createFirstOnRowColTS(Cell kv, long ts) {
292     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
293         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
294         kv.getQualifierOffset(), kv.getQualifierLength(), ts, Type.Maximum, kv.getValueArray(),
295         kv.getValueOffset(), kv.getValueLength());
296   }
297   
298   /**
299    * Create a KeyValue that is smaller than all other possible KeyValues
300    * for the given row. That is any (valid) KeyValue on 'row' would sort
301    * _after_ the result.
302    *
303    * @param row - row key (arbitrary byte array)
304    * @return First possible KeyValue on passed <code>row</code>
305    */
306   public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) {
307     return new KeyValue(row, roffset, rlength,
308         null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
309   }
310   
311 
312   /**
313    * Creates a KeyValue that is last on the specified row id. That is,
314    * every other possible KeyValue for the given row would compareTo()
315    * less than the result of this call.
316    * @param row row key
317    * @return Last possible KeyValue on passed <code>row</code>
318    */
319   public static KeyValue createLastOnRow(final byte[] row) {
320     return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum);
321   }
322 
323   /**
324    * Create a KeyValue that is smaller than all other possible KeyValues
325    * for the given row. That is any (valid) KeyValue on 'row' would sort
326    * _after_ the result.
327    *
328    * @param row - row key (arbitrary byte array)
329    * @return First possible KeyValue on passed <code>row</code>
330    */
331   public static KeyValue createFirstOnRow(final byte [] row) {
332     return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
333   }
334 
335   /**
336    * Creates a KeyValue that is smaller than all other KeyValues that
337    * are older than the passed timestamp.
338    * @param row - row key (arbitrary byte array)
339    * @param ts - timestamp
340    * @return First possible key on passed <code>row</code> and timestamp.
341    */
342   public static KeyValue createFirstOnRow(final byte [] row,
343       final long ts) {
344     return new KeyValue(row, null, null, ts, Type.Maximum);
345   }
346 
347   /**
348    * Create a KeyValue for the specified row, family and qualifier that would be
349    * smaller than all other possible KeyValues that have the same row,family,qualifier.
350    * Used for seeking.
351    * @param row - row key (arbitrary byte array)
352    * @param family - family name
353    * @param qualifier - column qualifier
354    * @return First possible key on passed <code>row</code>, and column.
355    */
356   public static KeyValue createFirstOnRow(final byte [] row, final byte [] family,
357       final byte [] qualifier) {
358     return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum);
359   }
360 
361   /**
362    * Create a Delete Family KeyValue for the specified row and family that would
363    * be smaller than all other possible Delete Family KeyValues that have the
364    * same row and family.
365    * Used for seeking.
366    * @param row - row key (arbitrary byte array)
367    * @param family - family name
368    * @return First Delete Family possible key on passed <code>row</code>.
369    */
370   public static KeyValue createFirstDeleteFamilyOnRow(final byte [] row,
371       final byte [] family) {
372     return new KeyValue(row, family, null, HConstants.LATEST_TIMESTAMP,
373         Type.DeleteFamily);
374   }
375 
376   /**
377    * @param row - row key (arbitrary byte array)
378    * @param f - family name
379    * @param q - column qualifier
380    * @param ts - timestamp
381    * @return First possible key on passed <code>row</code>, column and timestamp
382    */
383   public static KeyValue createFirstOnRow(final byte [] row, final byte [] f,
384       final byte [] q, final long ts) {
385     return new KeyValue(row, f, q, ts, Type.Maximum);
386   }
387 
388   /**
389    * Create a KeyValue for the specified row, family and qualifier that would be
390    * smaller than all other possible KeyValues that have the same row,
391    * family, qualifier.
392    * Used for seeking.
393    * @param row row key
394    * @param roffset row offset
395    * @param rlength row length
396    * @param family family name
397    * @param foffset family offset
398    * @param flength family length
399    * @param qualifier column qualifier
400    * @param qoffset qualifier offset
401    * @param qlength qualifier length
402    * @return First possible key on passed Row, Family, Qualifier.
403    */
404   public static KeyValue createFirstOnRow(final byte [] row,
405       final int roffset, final int rlength, final byte [] family,
406       final int foffset, final int flength, final byte [] qualifier,
407       final int qoffset, final int qlength) {
408     return new KeyValue(row, roffset, rlength, family,
409         foffset, flength, qualifier, qoffset, qlength,
410         HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
411   }
412 
413   /**
414    * Create a KeyValue for the specified row, family and qualifier that would be
415    * smaller than all other possible KeyValues that have the same row,
416    * family, qualifier.
417    * Used for seeking.
418    *
419    * @param buffer the buffer to use for the new <code>KeyValue</code> object
420    * @param row the value key
421    * @param family family name
422    * @param qualifier column qualifier
423    *
424    * @return First possible key on passed Row, Family, Qualifier.
425    *
426    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
427    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
428    */
429   public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row,
430       final byte [] family, final byte [] qualifier)
431           throws IllegalArgumentException {
432     return createFirstOnRow(buffer, 0, row, 0, row.length,
433         family, 0, family.length,
434         qualifier, 0, qualifier.length);
435   }
436 
437   /**
438    * Create a KeyValue for the specified row, family and qualifier that would be
439    * smaller than all other possible KeyValues that have the same row,
440    * family, qualifier.
441    * Used for seeking.
442    *
443    * @param buffer the buffer to use for the new <code>KeyValue</code> object
444    * @param boffset buffer offset
445    * @param row the value key
446    * @param roffset row offset
447    * @param rlength row length
448    * @param family family name
449    * @param foffset family offset
450    * @param flength family length
451    * @param qualifier column qualifier
452    * @param qoffset qualifier offset
453    * @param qlength qualifier length
454    *
455    * @return First possible key on passed Row, Family, Qualifier.
456    *
457    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
458    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
459    */
460   public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
461       final int roffset, final int rlength, final byte[] family, final int foffset,
462       final int flength, final byte[] qualifier, final int qoffset, final int qlength)
463       throws IllegalArgumentException {
464 
465     long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
466 
467     if (lLength > Integer.MAX_VALUE) {
468       throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
469     }
470     int iLength = (int) lLength;
471     if (buffer.length - boffset < iLength) {
472       throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < "
473           + iLength);
474     }
475 
476     int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
477         flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
478         null, 0, 0, null);
479     return new KeyValue(buffer, boffset, len);
480   }
481 
482   /**
483    * Creates the first KV with the row/family/qualifier of this KV and the
484    * given timestamp. Uses the "maximum" KV type that guarantees that the new
485    * KV is the lowest possible for this combination of row, family, qualifier,
486    * and timestamp. This KV's own timestamp is ignored. While this function
487    * copies the value from this KV, it is normally used on key-only KVs.
488    */
489   public static KeyValue createFirstOnRowColTS(KeyValue kv, long ts) {
490     return new KeyValue(
491         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
492         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
493         kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
494         ts, Type.Maximum, kv.getValueArray(), kv.getValueOffset(), kv.getValueLength());
495   }
496 
497   /*************** misc **********************************/
498   /**
499    * @param cell
500    * @return <code>cell<code> if it is an instance of {@link KeyValue} else we will return a
501    * new {@link KeyValue} instance made from <code>cell</code>
502    */
503   public static KeyValue ensureKeyValue(final Cell cell) {
504     if (cell == null) return null;
505     return cell instanceof KeyValue? (KeyValue)cell: copyToNewKeyValue(cell);
506   }
507 
508   public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
509     List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
510       public KeyValue apply(Cell arg0) {
511         return KeyValueUtil.ensureKeyValue(arg0);
512       }
513     });
514     return new ArrayList<KeyValue>(lazyList);
515   }
516 
517 }