001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.DataInput;
021import java.io.DataOutput;
022import java.io.EOFException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.OutputStream;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.List;
029import org.apache.hadoop.hbase.io.util.StreamUtils;
030import org.apache.hadoop.hbase.util.ByteBufferUtils;
031import org.apache.hadoop.hbase.util.Bytes;
032import org.apache.hadoop.io.IOUtils;
033import org.apache.hadoop.io.WritableUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hbase.thirdparty.com.google.common.base.Function;
039import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
040import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils;
041
042/**
043 * static convenience methods for dealing with KeyValues and collections of KeyValues
044 */
045@InterfaceAudience.Private
046public class KeyValueUtil {
047
048  private static final Logger LOG = LoggerFactory.getLogger(KeyValueUtil.class);
049
050  /**************** length *********************/
051
052  public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
053    if (withTags) {
054      return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen);
055    }
056    return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen);
057  }
058
059  /**
060   * Returns number of bytes this cell's key part would have been used if serialized as in
061   * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type. n * @return the
062   * key length
063   */
064  public static int keyLength(final Cell cell) {
065    return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
066  }
067
068  private static int keyLength(short rlen, byte flen, int qlen) {
069    return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
070  }
071
072  public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
073    int length = kv.getLength();
074    if (includeMvccVersion) {
075      length += WritableUtils.getVIntSize(kv.getSequenceId());
076    }
077    return length;
078  }
079
080  public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
081    final boolean includeMvccVersion) {
082    int length = 0;
083    for (KeyValue kv : IterableUtils.emptyIfNull(kvs)) {
084      length += lengthWithMvccVersion(kv, includeMvccVersion);
085    }
086    return length;
087  }
088
089  /**************** copy the cell to create a new keyvalue *********************/
090
091  public static KeyValue copyToNewKeyValue(final Cell cell) {
092    byte[] bytes = copyToNewByteArray(cell);
093    KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
094    kvCell.setSequenceId(cell.getSequenceId());
095    return kvCell;
096  }
097
098  /**
099   * The position will be set to the beginning of the new ByteBuffer n * @return the Bytebuffer
100   * containing the key part of the cell
101   */
102  public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
103    byte[] bytes = new byte[keyLength(cell)];
104    appendKeyTo(cell, bytes, 0);
105    ByteBuffer buffer = ByteBuffer.wrap(bytes);
106    return buffer;
107  }
108
109  /**
110   * Copies the key to a new KeyValue n * @return the KeyValue that consists only the key part of
111   * the incoming cell
112   */
113  public static KeyValue toNewKeyCell(final Cell cell) {
114    byte[] bytes = new byte[keyLength(cell)];
115    appendKeyTo(cell, bytes, 0);
116    KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
117    // Set the seq id. The new key cell could be used in comparisons so it
118    // is important that it uses the seqid also. If not the comparsion would fail
119    kv.setSequenceId(cell.getSequenceId());
120    return kv;
121  }
122
123  public static byte[] copyToNewByteArray(final Cell cell) {
124    // Cell#getSerializedSize returns the serialized size of the Source cell, which may
125    // not serialize all fields. We are constructing a KeyValue backing array here,
126    // which does include all fields, and must allocate accordingly.
127    // TODO we could probably use Cell#getSerializedSize safely, the errors were
128    // caused by cells corrupted by use-after-free bugs
129    int v1Length = length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
130      cell.getValueLength(), cell.getTagsLength(), true);
131    byte[] backingBytes = new byte[v1Length];
132    appendToByteArray(cell, backingBytes, 0, true);
133    return backingBytes;
134  }
135
136  public static int appendKeyTo(final Cell cell, final byte[] output, final int offset) {
137    int nextOffset = offset;
138    nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
139    nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
140    nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
141    nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
142    nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
143    nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
144    nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
145    return nextOffset;
146  }
147
148  /**************** copy key and value *********************/
149
150  public static int appendToByteArray(Cell cell, byte[] output, int offset, boolean withTags) {
151    int pos = offset;
152    pos = Bytes.putInt(output, pos, keyLength(cell));
153    pos = Bytes.putInt(output, pos, cell.getValueLength());
154    pos = appendKeyTo(cell, output, pos);
155    pos = CellUtil.copyValueTo(cell, output, pos);
156    if (withTags && (cell.getTagsLength() > 0)) {
157      pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
158      pos = PrivateCellUtil.copyTagsTo(cell, output, pos);
159    }
160    return pos;
161  }
162
163  /**
164   * Copy the Cell content into the passed buf in KeyValue serialization format.
165   */
166  public static int appendTo(Cell cell, ByteBuffer buf, int offset, boolean withTags) {
167    offset = ByteBufferUtils.putInt(buf, offset, keyLength(cell));// Key length
168    offset = ByteBufferUtils.putInt(buf, offset, cell.getValueLength());// Value length
169    offset = appendKeyTo(cell, buf, offset);
170    offset = CellUtil.copyValueTo(cell, buf, offset);// Value bytes
171    int tagsLength = cell.getTagsLength();
172    if (withTags && (tagsLength > 0)) {
173      offset = ByteBufferUtils.putAsShort(buf, offset, tagsLength);// Tags length
174      offset = PrivateCellUtil.copyTagsTo(cell, buf, offset);// Tags bytes
175    }
176    return offset;
177  }
178
179  public static int appendKeyTo(Cell cell, ByteBuffer buf, int offset) {
180    offset = ByteBufferUtils.putShort(buf, offset, cell.getRowLength());// RK length
181    offset = CellUtil.copyRowTo(cell, buf, offset);// Row bytes
182    offset = ByteBufferUtils.putByte(buf, offset, cell.getFamilyLength());// CF length
183    offset = CellUtil.copyFamilyTo(cell, buf, offset);// CF bytes
184    offset = CellUtil.copyQualifierTo(cell, buf, offset);// Qualifier bytes
185    offset = ByteBufferUtils.putLong(buf, offset, cell.getTimestamp());// TS
186    offset = ByteBufferUtils.putByte(buf, offset, cell.getTypeByte());// Type
187    return offset;
188  }
189
190  public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
191    final boolean includeMvccVersion) {
192    // keep pushing the limit out. assume enough capacity
193    bb.limit(bb.position() + kv.getLength());
194    bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
195    if (includeMvccVersion) {
196      int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId());
197      ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
198      ByteBufferUtils.writeVLong(bb, kv.getSequenceId());
199    }
200  }
201
202  /**************** iterating *******************************/
203
204  /**
205   * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
206   * position to the start of the next KeyValue. Does not allocate a new array or copy data. nnn
207   */
208  public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
209    boolean includesTags) {
210    if (bb.isDirect()) {
211      throw new IllegalArgumentException("only supports heap buffers");
212    }
213    if (bb.remaining() < 1) {
214      return null;
215    }
216    int underlyingArrayOffset = bb.arrayOffset() + bb.position();
217    int keyLength = bb.getInt();
218    int valueLength = bb.getInt();
219    ByteBufferUtils.skip(bb, keyLength + valueLength);
220    int tagsLength = 0;
221    if (includesTags) {
222      // Read short as unsigned, high byte first
223      tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
224      ByteBufferUtils.skip(bb, tagsLength);
225    }
226    int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
227    KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
228    if (includesMvccVersion) {
229      long mvccVersion = ByteBufferUtils.readVLong(bb);
230      keyValue.setSequenceId(mvccVersion);
231    }
232    return keyValue;
233  }
234
235  /*************** next/previous **********************************/
236
237  /**
238   * Decrement the timestamp. For tests (currently wasteful) Remember timestamps are sorted reverse
239   * chronologically. n * @return previous key
240   */
241  public static KeyValue previousKey(final KeyValue in) {
242    return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
243      CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
244  }
245
246  /**
247   * Create a KeyValue for the specified row, family and qualifier that would be larger than or
248   * equal to all other possible KeyValues that have the same row, family, qualifier. Used for
249   * reseeking. Should NEVER be returned to a client. n * row key n * row offset n * row length n *
250   * family name n * family offset n * family length n * column qualifier n * qualifier offset n *
251   * qualifier length
252   * @return Last possible key on passed row, family, qualifier.
253   */
254  public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
255    final byte[] family, final int foffset, final int flength, final byte[] qualifier,
256    final int qoffset, final int qlength) {
257    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
258      qlength, HConstants.OLDEST_TIMESTAMP, KeyValue.Type.Minimum, null, 0, 0);
259  }
260
261  /**
262   * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is
263   * any (valid) KeyValue on 'row' would sort _after_ the result.
264   * @param row - row key (arbitrary byte array)
265   * @return First possible KeyValue on passed <code>row</code>
266   */
267  public static KeyValue createFirstOnRow(final byte[] row, int roffset, short rlength) {
268    return new KeyValue(row, roffset, rlength, null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP,
269      KeyValue.Type.Maximum, null, 0, 0);
270  }
271
272  /**
273   * Creates a KeyValue that is last on the specified row id. That is, every other possible KeyValue
274   * for the given row would compareTo() less than the result of this call.
275   * @param row row key
276   * @return Last possible KeyValue on passed <code>row</code>
277   */
278  public static KeyValue createLastOnRow(final byte[] row) {
279    return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Minimum);
280  }
281
282  /**
283   * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is
284   * any (valid) KeyValue on 'row' would sort _after_ the result.
285   * @param row - row key (arbitrary byte array)
286   * @return First possible KeyValue on passed <code>row</code>
287   */
288  public static KeyValue createFirstOnRow(final byte[] row) {
289    return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
290  }
291
292  /**
293   * Creates a KeyValue that is smaller than all other KeyValues that are older than the passed
294   * timestamp.
295   * @param row - row key (arbitrary byte array)
296   * @param ts  - timestamp
297   * @return First possible key on passed <code>row</code> and timestamp.
298   */
299  public static KeyValue createFirstOnRow(final byte[] row, final long ts) {
300    return new KeyValue(row, null, null, ts, KeyValue.Type.Maximum);
301  }
302
303  /**
304   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
305   * other possible KeyValues that have the same row,family,qualifier. Used for seeking.
306   * @param row       - row key (arbitrary byte array)
307   * @param family    - family name
308   * @param qualifier - column qualifier
309   * @return First possible key on passed <code>row</code>, and column.
310   */
311  public static KeyValue createFirstOnRow(final byte[] row, final byte[] family,
312    final byte[] qualifier) {
313    return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum);
314  }
315
316  /**
317   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
318   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
319   * @param row - row key (arbitrary byte array)
320   * @param f   - family name
321   * @param q   - column qualifier
322   * @param ts  - timestamp
323   * @return First possible key on passed <code>row</code>, column and timestamp
324   */
325  public static KeyValue createFirstOnRow(final byte[] row, final byte[] f, final byte[] q,
326    final long ts) {
327    return new KeyValue(row, f, q, ts, KeyValue.Type.Maximum);
328  }
329
330  /**
331   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
332   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
333   * @param row       row key
334   * @param roffset   row offset
335   * @param rlength   row length
336   * @param family    family name
337   * @param foffset   family offset
338   * @param flength   family length
339   * @param qualifier column qualifier
340   * @param qoffset   qualifier offset
341   * @param qlength   qualifier length
342   * @return First possible key on passed Row, Family, Qualifier.
343   */
344  public static KeyValue createFirstOnRow(final byte[] row, final int roffset, final int rlength,
345    final byte[] family, final int foffset, final int flength, final byte[] qualifier,
346    final int qoffset, final int qlength) {
347    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
348      qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, null, 0, 0);
349  }
350
351  /**
352   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
353   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
354   * @param buffer    the buffer to use for the new <code>KeyValue</code> object
355   * @param row       the value key
356   * @param family    family name
357   * @param qualifier column qualifier
358   * @return First possible key on passed Row, Family, Qualifier.
359   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
360   *                                  than the provided buffer or than
361   *                                  <code>Integer.MAX_VALUE</code>
362   */
363  public static KeyValue createFirstOnRow(byte[] buffer, final byte[] row, final byte[] family,
364    final byte[] qualifier) throws IllegalArgumentException {
365    return createFirstOnRow(buffer, 0, row, 0, row.length, family, 0, family.length, qualifier, 0,
366      qualifier.length);
367  }
368
369  /**
370   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
371   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
372   * @param buffer    the buffer to use for the new <code>KeyValue</code> object
373   * @param boffset   buffer offset
374   * @param row       the value key
375   * @param roffset   row offset
376   * @param rlength   row length
377   * @param family    family name
378   * @param foffset   family offset
379   * @param flength   family length
380   * @param qualifier column qualifier
381   * @param qoffset   qualifier offset
382   * @param qlength   qualifier length
383   * @return First possible key on passed Row, Family, Qualifier.
384   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
385   *                                  than the provided buffer or than
386   *                                  <code>Integer.MAX_VALUE</code>
387   */
388  public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
389    final int roffset, final int rlength, final byte[] family, final int foffset, final int flength,
390    final byte[] qualifier, final int qoffset, final int qlength) throws IllegalArgumentException {
391
392    long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
393
394    if (lLength > Integer.MAX_VALUE) {
395      throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
396    }
397    int iLength = (int) lLength;
398    if (buffer.length - boffset < iLength) {
399      throw new IllegalArgumentException(
400        "Buffer size " + (buffer.length - boffset) + " < " + iLength);
401    }
402
403    int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
404      flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
405      null, 0, 0, null);
406    return new KeyValue(buffer, boffset, len);
407  }
408
409  /*************** misc **********************************/
410  /**
411   * n * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return
412   * a new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an object
413   * of any of the subclass of {@link KeyValue}, we will create a new {@link KeyValue} object
414   * wrapping same buffer. This API is used only with MR based tools which expect the type to be
415   * exactly KeyValue. That is the reason for doing this way.
416   * @deprecated without any replacement.
417   */
418  @Deprecated
419  public static KeyValue ensureKeyValue(final Cell cell) {
420    if (cell == null) return null;
421    if (cell instanceof KeyValue) {
422      if (cell.getClass().getName().equals(KeyValue.class.getName())) {
423        return (KeyValue) cell;
424      }
425      // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the
426      // same byte[]
427      KeyValue kv = (KeyValue) cell;
428      KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length);
429      newKv.setSequenceId(kv.getSequenceId());
430      return newKv;
431    }
432    return copyToNewKeyValue(cell);
433  }
434
435  @Deprecated
436  public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
437    List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
438      @Override
439      public KeyValue apply(Cell arg0) {
440        return KeyValueUtil.ensureKeyValue(arg0);
441      }
442    });
443    return new ArrayList<>(lazyList);
444  }
445
446  /**
447   * Write out a KeyValue in the manner in which we used to when KeyValue was a Writable. nn
448   * * @return Length written on stream n * @see #create(DataInput) for the inverse function
449   */
450  public static long write(final KeyValue kv, final DataOutput out) throws IOException {
451    // This is how the old Writables write used to serialize KVs. Need to figure
452    // way to make it
453    // work for all implementations.
454    int length = kv.getLength();
455    out.writeInt(length);
456    out.write(kv.getBuffer(), kv.getOffset(), length);
457    return (long) length + Bytes.SIZEOF_INT;
458  }
459
460  static String bytesToHex(byte[] buf, int offset, int length) {
461    String bufferContents = buf != null ? Bytes.toStringBinary(buf, offset, length) : "<null>";
462    return ", KeyValueBytesHex=" + bufferContents + ", offset=" + offset + ", length=" + length;
463  }
464
465  static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) {
466    if (buf == null) {
467      String msg = "Invalid to have null byte array in KeyValue.";
468      LOG.warn(msg);
469      throw new IllegalArgumentException(msg);
470    }
471
472    int pos = offset, endOffset = offset + length;
473    // check the key
474    if (pos + Bytes.SIZEOF_INT > endOffset) {
475      String msg =
476        "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length);
477      LOG.warn(msg);
478      throw new IllegalArgumentException(msg);
479    }
480    int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
481    pos += Bytes.SIZEOF_INT;
482    if (keyLen <= 0 || pos + keyLen > endOffset) {
483      String msg =
484        "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length);
485      LOG.warn(msg);
486      throw new IllegalArgumentException(msg);
487    }
488    // check the value
489    if (pos + Bytes.SIZEOF_INT > endOffset) {
490      String msg =
491        "Overflow when reading value length at position=" + pos + bytesToHex(buf, offset, length);
492      LOG.warn(msg);
493      throw new IllegalArgumentException(msg);
494    }
495    int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
496    pos += Bytes.SIZEOF_INT;
497    if (valLen < 0 || pos + valLen > endOffset) {
498      String msg =
499        "Invalid value length in KeyValue, valueLength=" + valLen + bytesToHex(buf, offset, length);
500      LOG.warn(msg);
501      throw new IllegalArgumentException(msg);
502    }
503    // check the row
504    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
505      String msg =
506        "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length);
507      LOG.warn(msg);
508      throw new IllegalArgumentException(msg);
509    }
510    short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT);
511    pos += Bytes.SIZEOF_SHORT;
512    if (rowLen < 0 || pos + rowLen > endOffset) {
513      String msg =
514        "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length);
515      LOG.warn(msg);
516      throw new IllegalArgumentException(msg);
517    }
518    pos += rowLen;
519    // check the family
520    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
521      String msg =
522        "Overflow when reading family length at position=" + pos + bytesToHex(buf, offset, length);
523      LOG.warn(msg);
524      throw new IllegalArgumentException(msg);
525    }
526    int familyLen = buf[pos];
527    pos += Bytes.SIZEOF_BYTE;
528    if (familyLen < 0 || pos + familyLen > endOffset) {
529      String msg = "Invalid family length in KeyValue, familyLength=" + familyLen
530        + bytesToHex(buf, offset, length);
531      LOG.warn(msg);
532      throw new IllegalArgumentException(msg);
533    }
534    pos += familyLen;
535    // check the qualifier
536    int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen
537      - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE;
538    if (qualifierLen < 0 || pos + qualifierLen > endOffset) {
539      String msg = "Invalid qualifier length in KeyValue, qualifierLen=" + qualifierLen
540        + bytesToHex(buf, offset, length);
541      LOG.warn(msg);
542      throw new IllegalArgumentException(msg);
543    }
544    pos += qualifierLen;
545    // check the timestamp
546    if (pos + Bytes.SIZEOF_LONG > endOffset) {
547      String msg =
548        "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length);
549      LOG.warn(msg);
550      throw new IllegalArgumentException(msg);
551    }
552    long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG);
553    if (timestamp < 0) {
554      String msg =
555        "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length);
556      LOG.warn(msg);
557      throw new IllegalArgumentException(msg);
558    }
559    pos += Bytes.SIZEOF_LONG;
560    // check the type
561    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
562      String msg =
563        "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length);
564      LOG.warn(msg);
565      throw new IllegalArgumentException(msg);
566    }
567    byte type = buf[pos];
568    if (!KeyValue.Type.isValidType(type)) {
569      String msg = "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length);
570      LOG.warn(msg);
571      throw new IllegalArgumentException(msg);
572    }
573    pos += Bytes.SIZEOF_BYTE;
574    // check the value
575    if (pos + valLen > endOffset) {
576      String msg =
577        "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length);
578      LOG.warn(msg);
579      throw new IllegalArgumentException(msg);
580    }
581    pos += valLen;
582    // check the tags
583    if (withTags) {
584      if (pos == endOffset) {
585        // withTags is true but no tag in the cell.
586        return;
587      }
588      pos = checkKeyValueTagBytes(buf, offset, length, pos, endOffset);
589    }
590    if (pos != endOffset) {
591      String msg = "Some redundant bytes in KeyValue's buffer, startOffset=" + pos + ", endOffset="
592        + endOffset + bytesToHex(buf, offset, length);
593      LOG.warn(msg);
594      throw new IllegalArgumentException(msg);
595    }
596  }
597
598  private static int checkKeyValueTagBytes(byte[] buf, int offset, int length, int pos,
599    int endOffset) {
600    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
601      String msg =
602        "Overflow when reading tags length at position=" + pos + bytesToHex(buf, offset, length);
603      LOG.warn(msg);
604      throw new IllegalArgumentException(msg);
605    }
606    short tagsLen = Bytes.toShort(buf, pos);
607    pos += Bytes.SIZEOF_SHORT;
608    if (tagsLen < 0 || pos + tagsLen > endOffset) {
609      String msg = "Invalid tags length in KeyValue at position=" + (pos - Bytes.SIZEOF_SHORT)
610        + bytesToHex(buf, offset, length);
611      LOG.warn(msg);
612      throw new IllegalArgumentException(msg);
613    }
614    int tagsEndOffset = pos + tagsLen;
615    for (; pos < tagsEndOffset;) {
616      if (pos + Tag.TAG_LENGTH_SIZE > endOffset) {
617        String msg =
618          "Overflow when reading tag length at position=" + pos + bytesToHex(buf, offset, length);
619        LOG.warn(msg);
620        throw new IllegalArgumentException(msg);
621      }
622      short tagLen = Bytes.toShort(buf, pos);
623      pos += Tag.TAG_LENGTH_SIZE;
624      // tagLen contains one byte tag type, so must be not less than 1.
625      if (tagLen < 1 || pos + tagLen > endOffset) {
626        String msg = "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE)
627          + ", tagLength=" + tagLen + bytesToHex(buf, offset, length);
628        LOG.warn(msg);
629        throw new IllegalArgumentException(msg);
630      }
631      pos += tagLen;
632    }
633    return pos;
634  }
635
636  /**
637   * Create a KeyValue reading from the raw InputStream. Named
638   * <code>createKeyValueFromInputStream</code> so doesn't clash with {@link #create(DataInput)}
639   * @param in       inputStream to read.
640   * @param withTags whether the keyvalue should include tags are not
641   * @return Created KeyValue OR if we find a length of zero, we will return null which can be
642   *         useful marking a stream as done. n
643   */
644  public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags)
645    throws IOException {
646    byte[] intBytes = new byte[Bytes.SIZEOF_INT];
647    int bytesRead = 0;
648    while (bytesRead < intBytes.length) {
649      int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead);
650      if (n < 0) {
651        if (bytesRead == 0) {
652          throw new EOFException();
653        }
654        throw new IOException("Failed read of int, read " + bytesRead + " bytes");
655      }
656      bytesRead += n;
657    }
658    byte[] bytes = new byte[Bytes.toInt(intBytes)];
659    IOUtils.readFully(in, bytes, 0, bytes.length);
660    return withTags
661      ? new KeyValue(bytes, 0, bytes.length)
662      : new NoTagsKeyValue(bytes, 0, bytes.length);
663  }
664
665  /**
666   * n * @return A KeyValue made of a byte array that holds the key-only part. Needed to convert
667   * hfile index members to KeyValues.
668   */
669  public static KeyValue createKeyValueFromKey(final byte[] b) {
670    return createKeyValueFromKey(b, 0, b.length);
671  }
672
673  /**
674   * n * @return A KeyValue made of a byte buffer that holds the key-only part. Needed to convert
675   * hfile index members to KeyValues.
676   */
677  public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
678    return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
679  }
680
681  /**
682   * nnn * @return A KeyValue made of a byte array that holds the key-only part. Needed to convert
683   * hfile index members to KeyValues.
684   */
685  public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) {
686    byte[] newb = new byte[l + KeyValue.ROW_OFFSET];
687    System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l);
688    Bytes.putInt(newb, 0, l);
689    Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
690    return new KeyValue(newb);
691  }
692
693  /**
694   * n * Where to read bytes from. Creates a byte array to hold the KeyValue backing bytes copied
695   * from the steam.
696   * @return KeyValue created by deserializing from <code>in</code> OR if we find a length of zero,
697   *         we will return null which can be useful marking a stream as done. n
698   */
699  public static KeyValue create(final DataInput in) throws IOException {
700    return create(in.readInt(), in);
701  }
702
703  /**
704   * Create a KeyValue reading <code>length</code> from <code>in</code> nn * @return Created
705   * KeyValue OR if we find a length of zero, we will return null which can be useful marking a
706   * stream as done. n
707   */
708  public static KeyValue create(int length, final DataInput in) throws IOException {
709
710    if (length <= 0) {
711      if (length == 0) return null;
712      throw new IOException("Failed read " + length + " bytes, stream corrupt?");
713    }
714
715    // This is how the old Writables.readFrom used to deserialize. Didn't even
716    // vint.
717    byte[] bytes = new byte[length];
718    in.readFully(bytes);
719    return new KeyValue(bytes, 0, length);
720  }
721
722  public static int getSerializedSize(Cell cell, boolean withTags) {
723    if (withTags) {
724      return cell.getSerializedSize();
725    }
726    if (cell instanceof ExtendedCell) {
727      return ((ExtendedCell) cell).getSerializedSize(withTags);
728    }
729    return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
730      cell.getValueLength(), cell.getTagsLength(), withTags);
731  }
732
733  public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags)
734    throws IOException {
735    if (cell instanceof ExtendedCell) {
736      return ((ExtendedCell) cell).write(out, withTags);
737    } else {
738      short rlen = cell.getRowLength();
739      byte flen = cell.getFamilyLength();
740      int qlen = cell.getQualifierLength();
741      int vlen = cell.getValueLength();
742      int tlen = cell.getTagsLength();
743      // write key length
744      int klen = keyLength(rlen, flen, qlen);
745      ByteBufferUtils.putInt(out, klen);
746      // write value length
747      ByteBufferUtils.putInt(out, vlen);
748      // Write rowkey - 2 bytes rk length followed by rowkey bytes
749      StreamUtils.writeShort(out, rlen);
750      out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
751      // Write cf - 1 byte of cf length followed by the family bytes
752      out.write(flen);
753      out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
754      // write qualifier
755      out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
756      // write timestamp
757      StreamUtils.writeLong(out, cell.getTimestamp());
758      // write the type
759      out.write(cell.getTypeByte());
760      // write value
761      out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
762      int size = klen + vlen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
763      // write tags if we have to
764      if (withTags && tlen > 0) {
765        // 2 bytes tags length followed by tags bytes
766        // tags length is serialized with 2 bytes only(short way) even if the
767        // type is int. As this
768        // is non -ve numbers, we save the sign bit. See HBASE-11437
769        out.write((byte) (0xff & (tlen >> 8)));
770        out.write((byte) (0xff & tlen));
771        out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
772        size += tlen + KeyValue.TAGS_LENGTH_SIZE;
773      }
774      return size;
775    }
776  }
777}