001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase;
020
021import java.io.DataInput;
022import java.io.DataOutput;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.OutputStream;
027import java.nio.ByteBuffer;
028import java.util.ArrayList;
029import java.util.List;
030
031import org.apache.hadoop.hbase.KeyValue.Type;
032import org.apache.hadoop.hbase.io.util.StreamUtils;
033import org.apache.hadoop.hbase.util.ByteBufferUtils;
034import org.apache.hadoop.hbase.util.Bytes;
035import org.apache.hadoop.io.IOUtils;
036import org.apache.hadoop.io.WritableUtils;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import org.apache.hbase.thirdparty.com.google.common.base.Function;
042import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
043import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils;
044
045/**
046 * static convenience methods for dealing with KeyValues and collections of KeyValues
047 */
048@InterfaceAudience.Private
049public class KeyValueUtil {
050
051  private static final Logger LOG = LoggerFactory.getLogger(KeyValueUtil.class);
052
053  /**************** length *********************/
054
055  public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
056    if (withTags) {
057      return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen));
058    }
059    return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen));
060  }
061
062  /**
063   * Returns number of bytes this cell's key part would have been used if serialized as in
064   * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
065   * @param cell
066   * @return the key length
067   */
068  public static int keyLength(final Cell cell) {
069    return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
070  }
071
072  private static int keyLength(short rlen, byte flen, int qlen) {
073    return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
074  }
075
076  public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
077    int length = kv.getLength();
078    if (includeMvccVersion) {
079      length += WritableUtils.getVIntSize(kv.getSequenceId());
080    }
081    return length;
082  }
083
084  public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
085      final boolean includeMvccVersion) {
086    int length = 0;
087    for (KeyValue kv : IterableUtils.emptyIfNull(kvs)) {
088      length += lengthWithMvccVersion(kv, includeMvccVersion);
089    }
090    return length;
091  }
092
093
094  /**************** copy the cell to create a new keyvalue *********************/
095
096  public static KeyValue copyToNewKeyValue(final Cell cell) {
097    byte[] bytes = copyToNewByteArray(cell);
098    KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
099    kvCell.setSequenceId(cell.getSequenceId());
100    return kvCell;
101  }
102
103  /**
104   * The position will be set to the beginning of the new ByteBuffer
105   * @param cell
106   * @return the Bytebuffer containing the key part of the cell
107   */
108  public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
109    byte[] bytes = new byte[keyLength(cell)];
110    appendKeyTo(cell, bytes, 0);
111    ByteBuffer buffer = ByteBuffer.wrap(bytes);
112    return buffer;
113  }
114
115  /**
116   * Copies the key to a new KeyValue
117   * @param cell
118   * @return the KeyValue that consists only the key part of the incoming cell
119   */
120  public static KeyValue toNewKeyCell(final Cell cell) {
121    byte[] bytes = new byte[keyLength(cell)];
122    appendKeyTo(cell, bytes, 0);
123    KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
124    // Set the seq id. The new key cell could be used in comparisons so it
125    // is important that it uses the seqid also. If not the comparsion would fail
126    kv.setSequenceId(cell.getSequenceId());
127    return kv;
128  }
129
130  public static byte[] copyToNewByteArray(final Cell cell) {
131    int v1Length = cell.getSerializedSize();
132    byte[] backingBytes = new byte[v1Length];
133    appendToByteArray(cell, backingBytes, 0, true);
134    return backingBytes;
135  }
136
137  public static int appendKeyTo(final Cell cell, final byte[] output,
138      final int offset) {
139    int nextOffset = offset;
140    nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
141    nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
142    nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
143    nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
144    nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
145    nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
146    nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
147    return nextOffset;
148  }
149
150  /**************** copy key and value *********************/
151
152  public static int appendToByteArray(Cell cell, byte[] output, int offset, boolean withTags) {
153    int pos = offset;
154    pos = Bytes.putInt(output, pos, keyLength(cell));
155    pos = Bytes.putInt(output, pos, cell.getValueLength());
156    pos = appendKeyTo(cell, output, pos);
157    pos = CellUtil.copyValueTo(cell, output, pos);
158    if (withTags && (cell.getTagsLength() > 0)) {
159      pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
160      pos = PrivateCellUtil.copyTagsTo(cell, output, pos);
161    }
162    return pos;
163  }
164
165  /**
166   * Copy the Cell content into the passed buf in KeyValue serialization format.
167   */
168  public static int appendTo(Cell cell, ByteBuffer buf, int offset, boolean withTags) {
169    offset = ByteBufferUtils.putInt(buf, offset, keyLength(cell));// Key length
170    offset = ByteBufferUtils.putInt(buf, offset, cell.getValueLength());// Value length
171    offset = appendKeyTo(cell, buf, offset);
172    offset = CellUtil.copyValueTo(cell, buf, offset);// Value bytes
173    int tagsLength = cell.getTagsLength();
174    if (withTags && (tagsLength > 0)) {
175      offset = ByteBufferUtils.putAsShort(buf, offset, tagsLength);// Tags length
176      offset = PrivateCellUtil.copyTagsTo(cell, buf, offset);// Tags bytes
177    }
178    return offset;
179  }
180
181  public static int appendKeyTo(Cell cell, ByteBuffer buf, int offset) {
182    offset = ByteBufferUtils.putShort(buf, offset, cell.getRowLength());// RK length
183    offset = CellUtil.copyRowTo(cell, buf, offset);// Row bytes
184    offset = ByteBufferUtils.putByte(buf, offset, cell.getFamilyLength());// CF length
185    offset = CellUtil.copyFamilyTo(cell, buf, offset);// CF bytes
186    offset = CellUtil.copyQualifierTo(cell, buf, offset);// Qualifier bytes
187    offset = ByteBufferUtils.putLong(buf, offset, cell.getTimestamp());// TS
188    offset = ByteBufferUtils.putByte(buf, offset, cell.getTypeByte());// Type
189    return offset;
190  }
191
192  public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
193      final boolean includeMvccVersion) {
194    // keep pushing the limit out. assume enough capacity
195    bb.limit(bb.position() + kv.getLength());
196    bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
197    if (includeMvccVersion) {
198      int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId());
199      ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
200      ByteBufferUtils.writeVLong(bb, kv.getSequenceId());
201    }
202  }
203
204
205  /**************** iterating *******************************/
206
207  /**
208   * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
209   * position to the start of the next KeyValue. Does not allocate a new array or copy data.
210   * @param bb
211   * @param includesMvccVersion
212   * @param includesTags
213   */
214  public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
215      boolean includesTags) {
216    if (bb.isDirect()) {
217      throw new IllegalArgumentException("only supports heap buffers");
218    }
219    if (bb.remaining() < 1) {
220      return null;
221    }
222    KeyValue keyValue = null;
223    int underlyingArrayOffset = bb.arrayOffset() + bb.position();
224    int keyLength = bb.getInt();
225    int valueLength = bb.getInt();
226    ByteBufferUtils.skip(bb, keyLength + valueLength);
227    int tagsLength = 0;
228    if (includesTags) {
229      // Read short as unsigned, high byte first
230      tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
231      ByteBufferUtils.skip(bb, tagsLength);
232    }
233    int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
234    keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
235    if (includesMvccVersion) {
236      long mvccVersion = ByteBufferUtils.readVLong(bb);
237      keyValue.setSequenceId(mvccVersion);
238    }
239    return keyValue;
240  }
241
242
243  /*************** next/previous **********************************/
244
245  /**
246   * Decrement the timestamp.  For tests (currently wasteful)
247   *
248   * Remember timestamps are sorted reverse chronologically.
249   * @param in
250   * @return previous key
251   */
252  public static KeyValue previousKey(final KeyValue in) {
253    return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
254      CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
255  }
256
257
258  /**
259   * Create a KeyValue for the specified row, family and qualifier that would be
260   * larger than or equal to all other possible KeyValues that have the same
261   * row, family, qualifier. Used for reseeking. Should NEVER be returned to a client.
262   *
263   * @param row
264   *          row key
265   * @param roffset
266   *         row offset
267   * @param rlength
268   *         row length
269   * @param family
270   *         family name
271   * @param foffset
272   *         family offset
273   * @param flength
274   *         family length
275   * @param qualifier
276   *        column qualifier
277   * @param qoffset
278   *        qualifier offset
279   * @param qlength
280   *        qualifier length
281   * @return Last possible key on passed row, family, qualifier.
282   */
283  public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
284      final byte[] family, final int foffset, final int flength, final byte[] qualifier,
285      final int qoffset, final int qlength) {
286    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
287        qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
288  }
289
290  /**
291   * Create a KeyValue that is smaller than all other possible KeyValues
292   * for the given row. That is any (valid) KeyValue on 'row' would sort
293   * _after_ the result.
294   *
295   * @param row - row key (arbitrary byte array)
296   * @return First possible KeyValue on passed <code>row</code>
297   */
298  public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) {
299    return new KeyValue(row, roffset, rlength,
300        null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
301  }
302
303  /**
304   * Creates a KeyValue that is last on the specified row id. That is,
305   * every other possible KeyValue for the given row would compareTo()
306   * less than the result of this call.
307   * @param row row key
308   * @return Last possible KeyValue on passed <code>row</code>
309   */
310  public static KeyValue createLastOnRow(final byte[] row) {
311    return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum);
312  }
313
314  /**
315   * Create a KeyValue that is smaller than all other possible KeyValues
316   * for the given row. That is any (valid) KeyValue on 'row' would sort
317   * _after_ the result.
318   *
319   * @param row - row key (arbitrary byte array)
320   * @return First possible KeyValue on passed <code>row</code>
321   */
322  public static KeyValue createFirstOnRow(final byte [] row) {
323    return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
324  }
325
326  /**
327   * Creates a KeyValue that is smaller than all other KeyValues that
328   * are older than the passed timestamp.
329   * @param row - row key (arbitrary byte array)
330   * @param ts - timestamp
331   * @return First possible key on passed <code>row</code> and timestamp.
332   */
333  public static KeyValue createFirstOnRow(final byte [] row,
334      final long ts) {
335    return new KeyValue(row, null, null, ts, Type.Maximum);
336  }
337
338  /**
339   * Create a KeyValue for the specified row, family and qualifier that would be
340   * smaller than all other possible KeyValues that have the same row,family,qualifier.
341   * Used for seeking.
342   * @param row - row key (arbitrary byte array)
343   * @param family - family name
344   * @param qualifier - column qualifier
345   * @return First possible key on passed <code>row</code>, and column.
346   */
347  public static KeyValue createFirstOnRow(final byte [] row, final byte [] family,
348      final byte [] qualifier) {
349    return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum);
350  }
351
352  /**
353   * @param row - row key (arbitrary byte array)
354   * @param f - family name
355   * @param q - column qualifier
356   * @param ts - timestamp
357   * @return First possible key on passed <code>row</code>, column and timestamp
358   */
359  public static KeyValue createFirstOnRow(final byte [] row, final byte [] f,
360      final byte [] q, final long ts) {
361    return new KeyValue(row, f, q, ts, Type.Maximum);
362  }
363
364  /**
365   * Create a KeyValue for the specified row, family and qualifier that would be
366   * smaller than all other possible KeyValues that have the same row,
367   * family, qualifier.
368   * Used for seeking.
369   * @param row row key
370   * @param roffset row offset
371   * @param rlength row length
372   * @param family family name
373   * @param foffset family offset
374   * @param flength family length
375   * @param qualifier column qualifier
376   * @param qoffset qualifier offset
377   * @param qlength qualifier length
378   * @return First possible key on passed Row, Family, Qualifier.
379   */
380  public static KeyValue createFirstOnRow(final byte [] row,
381      final int roffset, final int rlength, final byte [] family,
382      final int foffset, final int flength, final byte [] qualifier,
383      final int qoffset, final int qlength) {
384    return new KeyValue(row, roffset, rlength, family,
385        foffset, flength, qualifier, qoffset, qlength,
386        HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
387  }
388
389  /**
390   * Create a KeyValue for the specified row, family and qualifier that would be
391   * smaller than all other possible KeyValues that have the same row,
392   * family, qualifier.
393   * Used for seeking.
394   *
395   * @param buffer the buffer to use for the new <code>KeyValue</code> object
396   * @param row the value key
397   * @param family family name
398   * @param qualifier column qualifier
399   *
400   * @return First possible key on passed Row, Family, Qualifier.
401   *
402   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
403   * than the provided buffer or than <code>Integer.MAX_VALUE</code>
404   */
405  public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row,
406      final byte [] family, final byte [] qualifier)
407          throws IllegalArgumentException {
408    return createFirstOnRow(buffer, 0, row, 0, row.length,
409        family, 0, family.length,
410        qualifier, 0, qualifier.length);
411  }
412
413  /**
414   * Create a KeyValue for the specified row, family and qualifier that would be
415   * smaller than all other possible KeyValues that have the same row,
416   * family, qualifier.
417   * Used for seeking.
418   *
419   * @param buffer the buffer to use for the new <code>KeyValue</code> object
420   * @param boffset buffer offset
421   * @param row the value key
422   * @param roffset row offset
423   * @param rlength row length
424   * @param family family name
425   * @param foffset family offset
426   * @param flength family length
427   * @param qualifier column qualifier
428   * @param qoffset qualifier offset
429   * @param qlength qualifier length
430   *
431   * @return First possible key on passed Row, Family, Qualifier.
432   *
433   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
434   * than the provided buffer or than <code>Integer.MAX_VALUE</code>
435   */
436  public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
437      final int roffset, final int rlength, final byte[] family, final int foffset,
438      final int flength, final byte[] qualifier, final int qoffset, final int qlength)
439      throws IllegalArgumentException {
440
441    long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
442
443    if (lLength > Integer.MAX_VALUE) {
444      throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
445    }
446    int iLength = (int) lLength;
447    if (buffer.length - boffset < iLength) {
448      throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < "
449          + iLength);
450    }
451
452    int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
453        flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
454        null, 0, 0, null);
455    return new KeyValue(buffer, boffset, len);
456  }
457
458  /*************** misc **********************************/
459  /**
460   * @param cell
461   * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return a
462   *         new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an
463   *         object of any of the subclass of {@link KeyValue}, we will create a new
464   *         {@link KeyValue} object wrapping same buffer. This API is used only with MR based tools
465   *         which expect the type to be exactly KeyValue. That is the reason for doing this way.
466   * @deprecated without any replacement.
467   */
468  @Deprecated
469  public static KeyValue ensureKeyValue(final Cell cell) {
470    if (cell == null) return null;
471    if (cell instanceof KeyValue) {
472      if (cell.getClass().getName().equals(KeyValue.class.getName())) {
473        return (KeyValue) cell;
474      }
475      // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the
476      // same byte[]
477      KeyValue kv = (KeyValue) cell;
478      KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length);
479      newKv.setSequenceId(kv.getSequenceId());
480      return newKv;
481    }
482    return copyToNewKeyValue(cell);
483  }
484
485  @Deprecated
486  public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
487    List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
488      @Override
489      public KeyValue apply(Cell arg0) {
490        return KeyValueUtil.ensureKeyValue(arg0);
491      }
492    });
493    return new ArrayList<>(lazyList);
494  }
495  /**
496   * Write out a KeyValue in the manner in which we used to when KeyValue was a
497   * Writable.
498   *
499   * @param kv
500   * @param out
501   * @return Length written on stream
502   * @throws IOException
503   * @see #create(DataInput) for the inverse function
504   */
505  public static long write(final KeyValue kv, final DataOutput out) throws IOException {
506    // This is how the old Writables write used to serialize KVs. Need to figure
507    // way to make it
508    // work for all implementations.
509    int length = kv.getLength();
510    out.writeInt(length);
511    out.write(kv.getBuffer(), kv.getOffset(), length);
512    return (long) length + Bytes.SIZEOF_INT;
513  }
514
515  static String bytesToHex(byte[] buf, int offset, int length) {
516    String bufferContents = buf != null ? Bytes.toStringBinary(buf, offset, length) : "<null>";
517    return ", KeyValueBytesHex=" + bufferContents + ", offset=" + offset + ", length=" + length;
518  }
519
520  static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) {
521    if (buf == null) {
522      String msg = "Invalid to have null byte array in KeyValue.";
523      LOG.warn(msg);
524      throw new IllegalArgumentException(msg);
525    }
526
527    int pos = offset, endOffset = offset + length;
528    // check the key
529    if (pos + Bytes.SIZEOF_INT > endOffset) {
530      String msg =
531          "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length);
532      LOG.warn(msg);
533      throw new IllegalArgumentException(msg);
534    }
535    int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
536    pos += Bytes.SIZEOF_INT;
537    if (keyLen <= 0 || pos + keyLen > endOffset) {
538      String msg =
539          "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length);
540      LOG.warn(msg);
541      throw new IllegalArgumentException(msg);
542    }
543    // check the value
544    if (pos + Bytes.SIZEOF_INT > endOffset) {
545      String msg =
546          "Overflow when reading value length at position=" + pos + bytesToHex(buf, offset, length);
547      LOG.warn(msg);
548      throw new IllegalArgumentException(msg);
549    }
550    int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
551    pos += Bytes.SIZEOF_INT;
552    if (valLen < 0 || pos + valLen > endOffset) {
553      String msg = "Invalid value length in KeyValue, valueLength=" + valLen +
554          bytesToHex(buf, offset, length);
555      LOG.warn(msg);
556      throw new IllegalArgumentException(msg);
557    }
558    // check the row
559    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
560      String msg =
561          "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length);
562      LOG.warn(msg);
563      throw new IllegalArgumentException(msg);
564    }
565    short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT);
566    pos += Bytes.SIZEOF_SHORT;
567    if (rowLen < 0 || pos + rowLen > endOffset) {
568      String msg =
569          "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length);
570      LOG.warn(msg);
571      throw new IllegalArgumentException(msg);
572    }
573    pos += rowLen;
574    // check the family
575    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
576      String msg = "Overflow when reading family length at position=" + pos +
577          bytesToHex(buf, offset, length);
578      LOG.warn(msg);
579      throw new IllegalArgumentException(msg);
580    }
581    int familyLen = buf[pos];
582    pos += Bytes.SIZEOF_BYTE;
583    if (familyLen < 0 || pos + familyLen > endOffset) {
584      String msg = "Invalid family length in KeyValue, familyLength=" + familyLen +
585          bytesToHex(buf, offset, length);
586      LOG.warn(msg);
587      throw new IllegalArgumentException(msg);
588    }
589    pos += familyLen;
590    // check the qualifier
591    int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen
592        - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE;
593    if (qualifierLen < 0 || pos + qualifierLen > endOffset) {
594      String msg = "Invalid qualifier length in KeyValue, qualifierLen=" + qualifierLen +
595              bytesToHex(buf, offset, length);
596      LOG.warn(msg);
597      throw new IllegalArgumentException(msg);
598    }
599    pos += qualifierLen;
600    // check the timestamp
601    if (pos + Bytes.SIZEOF_LONG > endOffset) {
602      String msg =
603          "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length);
604      LOG.warn(msg);
605      throw new IllegalArgumentException(msg);
606    }
607    long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG);
608    if (timestamp < 0) {
609      String msg =
610          "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length);
611      LOG.warn(msg);
612      throw new IllegalArgumentException(msg);
613    }
614    pos += Bytes.SIZEOF_LONG;
615    // check the type
616    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
617      String msg =
618          "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length);
619      LOG.warn(msg);
620      throw new IllegalArgumentException(msg);
621    }
622    byte type = buf[pos];
623    if (!Type.isValidType(type)) {
624      String msg = "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length);
625      LOG.warn(msg);
626      throw new IllegalArgumentException(msg);
627    }
628    pos += Bytes.SIZEOF_BYTE;
629    // check the value
630    if (pos + valLen > endOffset) {
631      String msg =
632          "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length);
633      LOG.warn(msg);
634      throw new IllegalArgumentException(msg);
635    }
636    pos += valLen;
637    // check the tags
638    if (withTags) {
639      if (pos == endOffset) {
640        // withTags is true but no tag in the cell.
641        return;
642      }
643      pos = checkKeyValueTagBytes(buf, offset, length, pos, endOffset);
644    }
645    if (pos != endOffset) {
646      String msg = "Some redundant bytes in KeyValue's buffer, startOffset=" + pos + ", endOffset="
647          + endOffset + bytesToHex(buf, offset, length);
648      LOG.warn(msg);
649      throw new IllegalArgumentException(msg);
650    }
651  }
652
653  private static int checkKeyValueTagBytes(byte[] buf, int offset, int length, int pos,
654      int endOffset) {
655    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
656      String msg = "Overflow when reading tags length at position=" + pos +
657          bytesToHex(buf, offset, length);
658      LOG.warn(msg);
659      throw new IllegalArgumentException(msg);
660    }
661    short tagsLen = Bytes.toShort(buf, pos);
662    pos += Bytes.SIZEOF_SHORT;
663    if (tagsLen < 0 || pos + tagsLen > endOffset) {
664      String msg = "Invalid tags length in KeyValue at position=" + (pos - Bytes.SIZEOF_SHORT)
665          + bytesToHex(buf, offset, length);
666      LOG.warn(msg);
667      throw new IllegalArgumentException(msg);
668    }
669    int tagsEndOffset = pos + tagsLen;
670    for (; pos < tagsEndOffset;) {
671      if (pos + Tag.TAG_LENGTH_SIZE > endOffset) {
672        String msg = "Overflow when reading tag length at position=" + pos +
673            bytesToHex(buf, offset, length);
674        LOG.warn(msg);
675        throw new IllegalArgumentException(msg);
676      }
677      short tagLen = Bytes.toShort(buf, pos);
678      pos += Tag.TAG_LENGTH_SIZE;
679      // tagLen contains one byte tag type, so must be not less than 1.
680      if (tagLen < 1 || pos + tagLen > endOffset) {
681        String msg =
682            "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE) + ", tagLength="
683                + tagLen + bytesToHex(buf, offset, length);
684        LOG.warn(msg);
685        throw new IllegalArgumentException(msg);
686      }
687      pos += tagLen;
688    }
689    return pos;
690  }
691
692  /**
693   * Create a KeyValue reading from the raw InputStream. Named
694   * <code>createKeyValueFromInputStream</code> so doesn't clash with {@link #create(DataInput)}
695   * @param in inputStream to read.
696   * @param withTags whether the keyvalue should include tags are not
697   * @return Created KeyValue OR if we find a length of zero, we will return null which can be
698   *         useful marking a stream as done.
699   * @throws IOException
700   */
701  public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags)
702      throws IOException {
703    byte[] intBytes = new byte[Bytes.SIZEOF_INT];
704    int bytesRead = 0;
705    while (bytesRead < intBytes.length) {
706      int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead);
707      if (n < 0) {
708        if (bytesRead == 0) {
709          throw new EOFException();
710        }
711        throw new IOException("Failed read of int, read " + bytesRead + " bytes");
712      }
713      bytesRead += n;
714    }
715    byte[] bytes = new byte[Bytes.toInt(intBytes)];
716    IOUtils.readFully(in, bytes, 0, bytes.length);
717    return withTags ? new KeyValue(bytes, 0, bytes.length)
718        : new NoTagsKeyValue(bytes, 0, bytes.length);
719  }
720
721  /**
722   * @param b
723   * @return A KeyValue made of a byte array that holds the key-only part.
724   *         Needed to convert hfile index members to KeyValues.
725   */
726  public static KeyValue createKeyValueFromKey(final byte[] b) {
727    return createKeyValueFromKey(b, 0, b.length);
728  }
729
730  /**
731   * @param bb
732   * @return A KeyValue made of a byte buffer that holds the key-only part.
733   *         Needed to convert hfile index members to KeyValues.
734   */
735  public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
736    return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
737  }
738
739  /**
740   * @param b
741   * @param o
742   * @param l
743   * @return A KeyValue made of a byte array that holds the key-only part.
744   *         Needed to convert hfile index members to KeyValues.
745   */
746  public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) {
747    byte[] newb = new byte[l + KeyValue.ROW_OFFSET];
748    System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l);
749    Bytes.putInt(newb, 0, l);
750    Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
751    return new KeyValue(newb);
752  }
753
754  /**
755   * @param in
756   *          Where to read bytes from. Creates a byte array to hold the
757   *          KeyValue backing bytes copied from the steam.
758   * @return KeyValue created by deserializing from <code>in</code> OR if we
759   *         find a length of zero, we will return null which can be useful
760   *         marking a stream as done.
761   * @throws IOException
762   */
763  public static KeyValue create(final DataInput in) throws IOException {
764    return create(in.readInt(), in);
765  }
766
767  /**
768   * Create a KeyValue reading <code>length</code> from <code>in</code>
769   *
770   * @param length
771   * @param in
772   * @return Created KeyValue OR if we find a length of zero, we will return
773   *         null which can be useful marking a stream as done.
774   * @throws IOException
775   */
776  public static KeyValue create(int length, final DataInput in) throws IOException {
777
778    if (length <= 0) {
779      if (length == 0)
780        return null;
781      throw new IOException("Failed read " + length + " bytes, stream corrupt?");
782    }
783
784    // This is how the old Writables.readFrom used to deserialize. Didn't even
785    // vint.
786    byte[] bytes = new byte[length];
787    in.readFully(bytes);
788    return new KeyValue(bytes, 0, length);
789  }
790
791  public static int getSerializedSize(Cell cell, boolean withTags) {
792    if (withTags) {
793      return cell.getSerializedSize();
794    }
795    if (cell instanceof ExtendedCell) {
796      return ((ExtendedCell) cell).getSerializedSize(withTags);
797    }
798    return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
799      cell.getValueLength(), cell.getTagsLength(), withTags);
800  }
801
802  public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags)
803      throws IOException {
804    if (cell instanceof ExtendedCell) {
805      return ((ExtendedCell)cell).write(out, withTags);
806    } else {
807      short rlen = cell.getRowLength();
808      byte flen = cell.getFamilyLength();
809      int qlen = cell.getQualifierLength();
810      int vlen = cell.getValueLength();
811      int tlen = cell.getTagsLength();
812      int size = 0;
813      // write key length
814      int klen = keyLength(rlen, flen, qlen);
815      ByteBufferUtils.putInt(out, klen);
816      // write value length
817      ByteBufferUtils.putInt(out, vlen);
818      // Write rowkey - 2 bytes rk length followed by rowkey bytes
819      StreamUtils.writeShort(out, rlen);
820      out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
821      // Write cf - 1 byte of cf length followed by the family bytes
822      out.write(flen);
823      out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
824      // write qualifier
825      out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
826      // write timestamp
827      StreamUtils.writeLong(out, cell.getTimestamp());
828      // write the type
829      out.write(cell.getTypeByte());
830      // write value
831      out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
832      size = klen + vlen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
833      // write tags if we have to
834      if (withTags && tlen > 0) {
835        // 2 bytes tags length followed by tags bytes
836        // tags length is serialized with 2 bytes only(short way) even if the
837        // type is int. As this
838        // is non -ve numbers, we save the sign bit. See HBASE-11437
839        out.write((byte) (0xff & (tlen >> 8)));
840        out.write((byte) (0xff & tlen));
841        out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
842        size += tlen + KeyValue.TAGS_LENGTH_SIZE;
843      }
844      return size;
845    }
846  }
847}