001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkArgument; 021import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkNotNull; 022import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkPositionIndex; 023 024import java.io.DataInput; 025import java.io.DataOutput; 026import java.io.IOException; 027import java.io.UnsupportedEncodingException; 028import java.math.BigDecimal; 029import java.math.BigInteger; 030import java.nio.ByteBuffer; 031import java.nio.charset.StandardCharsets; 032import java.security.SecureRandom; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.Iterator; 039import java.util.List; 040 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.CellComparator; 043import org.apache.hadoop.hbase.KeyValue; 044import org.apache.hadoop.io.RawComparator; 045import org.apache.hadoop.io.WritableComparator; 046import org.apache.hadoop.io.WritableUtils; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 052import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 053 054import com.google.protobuf.ByteString; 055 056import sun.misc.Unsafe; 057 058/** 059 * Utility class that handles byte arrays, conversions to/from other types, 060 * comparisons, hash code generation, manufacturing keys for HashMaps or 061 * HashSets, and can be used as key in maps or trees. 062 */ 063@SuppressWarnings("restriction") 064@InterfaceAudience.Public 065@edu.umd.cs.findbugs.annotations.SuppressWarnings( 066 value="EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS", 067 justification="It has been like this forever") 068public class Bytes implements Comparable<Bytes> { 069 070 // Using the charset canonical name for String/byte[] conversions is much 071 // more efficient due to use of cached encoders/decoders. 072 private static final String UTF8_CSN = StandardCharsets.UTF_8.name(); 073 074 //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed 075 private static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; 076 077 private static final Logger LOG = LoggerFactory.getLogger(Bytes.class); 078 079 /** 080 * Size of boolean in bytes 081 */ 082 public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE; 083 084 /** 085 * Size of byte in bytes 086 */ 087 public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN; 088 089 /** 090 * Size of char in bytes 091 */ 092 public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE; 093 094 /** 095 * Size of double in bytes 096 */ 097 public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE; 098 099 /** 100 * Size of float in bytes 101 */ 102 public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE; 103 104 /** 105 * Size of int in bytes 106 */ 107 public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE; 108 109 /** 110 * Size of long in bytes 111 */ 112 public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE; 113 114 /** 115 * Size of short in bytes 116 */ 117 public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; 118 119 /** 120 * Mask to apply to a long to reveal the lower int only. Use like this: 121 * int i = (int)(0xFFFFFFFF00000000L ^ some_long_value); 122 */ 123 public static final long MASK_FOR_LOWER_INT_IN_LONG = 0xFFFFFFFF00000000L; 124 125 /** 126 * Estimate of size cost to pay beyond payload in jvm for instance of byte []. 127 * Estimate based on study of jhat and jprofiler numbers. 128 */ 129 // JHat says BU is 56 bytes. 130 // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?) 131 public static final int ESTIMATED_HEAP_TAX = 16; 132 133 @VisibleForTesting 134 static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); 135 136 /** 137 * Returns length of the byte array, returning 0 if the array is null. 138 * Useful for calculating sizes. 139 * @param b byte array, which can be null 140 * @return 0 if b is null, otherwise returns length 141 */ 142 final public static int len(byte[] b) { 143 return b == null ? 0 : b.length; 144 } 145 146 private byte[] bytes; 147 private int offset; 148 private int length; 149 150 /** 151 * Create a zero-size sequence. 152 */ 153 public Bytes() { 154 super(); 155 } 156 157 /** 158 * Create a Bytes using the byte array as the initial value. 159 * @param bytes This array becomes the backing storage for the object. 160 */ 161 public Bytes(byte[] bytes) { 162 this(bytes, 0, bytes.length); 163 } 164 165 /** 166 * Set the new Bytes to the contents of the passed 167 * <code>ibw</code>. 168 * @param ibw the value to set this Bytes to. 169 */ 170 public Bytes(final Bytes ibw) { 171 this(ibw.get(), ibw.getOffset(), ibw.getLength()); 172 } 173 174 /** 175 * Set the value to a given byte range 176 * @param bytes the new byte range to set to 177 * @param offset the offset in newData to start at 178 * @param length the number of bytes in the range 179 */ 180 public Bytes(final byte[] bytes, final int offset, 181 final int length) { 182 this.bytes = bytes; 183 this.offset = offset; 184 this.length = length; 185 } 186 187 /** 188 * Copy bytes from ByteString instance. 189 * @param byteString copy from 190 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 191 */ 192 @Deprecated 193 public Bytes(final ByteString byteString) { 194 this(byteString.toByteArray()); 195 } 196 197 /** 198 * Get the data from the Bytes. 199 * @return The data is only valid between offset and offset+length. 200 */ 201 public byte [] get() { 202 if (this.bytes == null) { 203 throw new IllegalStateException("Uninitialiized. Null constructor " + 204 "called w/o accompaying readFields invocation"); 205 } 206 return this.bytes; 207 } 208 209 /** 210 * @param b Use passed bytes as backing array for this instance. 211 */ 212 public void set(final byte [] b) { 213 set(b, 0, b.length); 214 } 215 216 /** 217 * @param b Use passed bytes as backing array for this instance. 218 * @param offset 219 * @param length 220 */ 221 public void set(final byte [] b, final int offset, final int length) { 222 this.bytes = b; 223 this.offset = offset; 224 this.length = length; 225 } 226 227 /** 228 * @return the number of valid bytes in the buffer 229 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #getLength()} instead. 230 * @see #getLength() 231 * @see <a href="https://issues.apache.org/jira/browse/HBASE-11862">HBASE-11862</a> 232 */ 233 @Deprecated 234 public int getSize() { 235 if (this.bytes == null) { 236 throw new IllegalStateException("Uninitialiized. Null constructor " + 237 "called w/o accompaying readFields invocation"); 238 } 239 return this.length; 240 } 241 242 /** 243 * @return the number of valid bytes in the buffer 244 */ 245 public int getLength() { 246 if (this.bytes == null) { 247 throw new IllegalStateException("Uninitialiized. Null constructor " + 248 "called w/o accompaying readFields invocation"); 249 } 250 return this.length; 251 } 252 253 /** 254 * @return offset 255 */ 256 public int getOffset(){ 257 return this.offset; 258 } 259 260 /** 261 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 262 */ 263 @Deprecated 264 public ByteString toByteString() { 265 return ByteString.copyFrom(this.bytes, this.offset, this.length); 266 } 267 268 @Override 269 public int hashCode() { 270 return Bytes.hashCode(bytes, offset, length); 271 } 272 273 /** 274 * Define the sort order of the Bytes. 275 * @param that The other bytes writable 276 * @return Positive if left is bigger than right, 0 if they are equal, and 277 * negative if left is smaller than right. 278 */ 279 @Override 280 public int compareTo(Bytes that) { 281 return BYTES_RAWCOMPARATOR.compare( 282 this.bytes, this.offset, this.length, 283 that.bytes, that.offset, that.length); 284 } 285 286 /** 287 * Compares the bytes in this object to the specified byte array 288 * @param that 289 * @return Positive if left is bigger than right, 0 if they are equal, and 290 * negative if left is smaller than right. 291 */ 292 public int compareTo(final byte [] that) { 293 return BYTES_RAWCOMPARATOR.compare( 294 this.bytes, this.offset, this.length, 295 that, 0, that.length); 296 } 297 298 /** 299 * @see Object#equals(Object) 300 */ 301 @Override 302 public boolean equals(Object right_obj) { 303 if (right_obj instanceof byte []) { 304 return compareTo((byte [])right_obj) == 0; 305 } 306 if (right_obj instanceof Bytes) { 307 return compareTo((Bytes)right_obj) == 0; 308 } 309 return false; 310 } 311 312 /** 313 * @see Object#toString() 314 */ 315 @Override 316 public String toString() { 317 return Bytes.toString(bytes, offset, length); 318 } 319 320 /** 321 * @param array List of byte []. 322 * @return Array of byte []. 323 */ 324 public static byte [][] toArray(final List<byte []> array) { 325 // List#toArray doesn't work on lists of byte []. 326 byte[][] results = new byte[array.size()][]; 327 for (int i = 0; i < array.size(); i++) { 328 results[i] = array.get(i); 329 } 330 return results; 331 } 332 333 /** 334 * Returns a copy of the bytes referred to by this writable 335 */ 336 public byte[] copyBytes() { 337 return Arrays.copyOfRange(bytes, offset, offset+length); 338 } 339 /** 340 * Byte array comparator class. 341 */ 342 @InterfaceAudience.Public 343 public static class ByteArrayComparator implements RawComparator<byte []> { 344 /** 345 * Constructor 346 */ 347 public ByteArrayComparator() { 348 super(); 349 } 350 @Override 351 public int compare(byte [] left, byte [] right) { 352 return compareTo(left, right); 353 } 354 @Override 355 public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) { 356 return LexicographicalComparerHolder.BEST_COMPARER. 357 compareTo(b1, s1, l1, b2, s2, l2); 358 } 359 } 360 361 /** 362 * A {@link ByteArrayComparator} that treats the empty array as the largest value. 363 * This is useful for comparing row end keys for regions. 364 */ 365 // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region 366 // boundaries. Thus semantically, we should treat empty byte array as the smallest value 367 // while comparing row keys, start keys etc; but as the largest value for comparing 368 // region boundaries for endKeys. 369 @InterfaceAudience.Public 370 public static class RowEndKeyComparator extends ByteArrayComparator { 371 @Override 372 public int compare(byte[] left, byte[] right) { 373 return compare(left, 0, left.length, right, 0, right.length); 374 } 375 @Override 376 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { 377 if (b1 == b2 && s1 == s2 && l1 == l2) { 378 return 0; 379 } 380 if (l1 == 0) { 381 return l2; //0 or positive 382 } 383 if (l2 == 0) { 384 return -1; 385 } 386 return super.compare(b1, s1, l1, b2, s2, l2); 387 } 388 } 389 390 /** 391 * Pass this to TreeMaps where byte [] are keys. 392 */ 393 public final static Comparator<byte []> BYTES_COMPARATOR = new ByteArrayComparator(); 394 395 /** 396 * Use comparing byte arrays, byte-by-byte 397 */ 398 public final static RawComparator<byte []> BYTES_RAWCOMPARATOR = new ByteArrayComparator(); 399 400 /** 401 * Read byte-array written with a WritableableUtils.vint prefix. 402 * @param in Input to read from. 403 * @return byte array read off <code>in</code> 404 * @throws IOException e 405 */ 406 public static byte [] readByteArray(final DataInput in) 407 throws IOException { 408 int len = WritableUtils.readVInt(in); 409 if (len < 0) { 410 throw new NegativeArraySizeException(Integer.toString(len)); 411 } 412 byte [] result = new byte[len]; 413 in.readFully(result, 0, len); 414 return result; 415 } 416 417 /** 418 * Read byte-array written with a WritableableUtils.vint prefix. 419 * IOException is converted to a RuntimeException. 420 * @param in Input to read from. 421 * @return byte array read off <code>in</code> 422 */ 423 public static byte [] readByteArrayThrowsRuntime(final DataInput in) { 424 try { 425 return readByteArray(in); 426 } catch (Exception e) { 427 throw new RuntimeException(e); 428 } 429 } 430 431 /** 432 * Write byte-array with a WritableableUtils.vint prefix. 433 * @param out output stream to be written to 434 * @param b array to write 435 * @throws IOException e 436 */ 437 public static void writeByteArray(final DataOutput out, final byte [] b) 438 throws IOException { 439 if(b == null) { 440 WritableUtils.writeVInt(out, 0); 441 } else { 442 writeByteArray(out, b, 0, b.length); 443 } 444 } 445 446 /** 447 * Write byte-array to out with a vint length prefix. 448 * @param out output stream 449 * @param b array 450 * @param offset offset into array 451 * @param length length past offset 452 * @throws IOException e 453 */ 454 public static void writeByteArray(final DataOutput out, final byte [] b, 455 final int offset, final int length) 456 throws IOException { 457 WritableUtils.writeVInt(out, length); 458 out.write(b, offset, length); 459 } 460 461 /** 462 * Write byte-array from src to tgt with a vint length prefix. 463 * @param tgt target array 464 * @param tgtOffset offset into target array 465 * @param src source array 466 * @param srcOffset source offset 467 * @param srcLength source length 468 * @return New offset in src array. 469 */ 470 public static int writeByteArray(final byte [] tgt, final int tgtOffset, 471 final byte [] src, final int srcOffset, final int srcLength) { 472 byte [] vint = vintToBytes(srcLength); 473 System.arraycopy(vint, 0, tgt, tgtOffset, vint.length); 474 int offset = tgtOffset + vint.length; 475 System.arraycopy(src, srcOffset, tgt, offset, srcLength); 476 return offset + srcLength; 477 } 478 479 /** 480 * Put bytes at the specified byte array position. 481 * @param tgtBytes the byte array 482 * @param tgtOffset position in the array 483 * @param srcBytes array to write out 484 * @param srcOffset source offset 485 * @param srcLength source length 486 * @return incremented offset 487 */ 488 public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, 489 int srcOffset, int srcLength) { 490 System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength); 491 return tgtOffset + srcLength; 492 } 493 494 /** 495 * Write a single byte out to the specified byte array position. 496 * @param bytes the byte array 497 * @param offset position in the array 498 * @param b byte to write out 499 * @return incremented offset 500 */ 501 public static int putByte(byte[] bytes, int offset, byte b) { 502 bytes[offset] = b; 503 return offset + 1; 504 } 505 506 /** 507 * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified. 508 * @param bytes the byte array 509 * @param offset position in the array 510 * @param buf ByteBuffer to write out 511 * @return incremented offset 512 */ 513 public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) { 514 int len = buf.remaining(); 515 buf.get(bytes, offset, len); 516 return offset + len; 517 } 518 519 /** 520 * Returns a new byte array, copied from the given {@code buf}, 521 * from the index 0 (inclusive) to the limit (exclusive), 522 * regardless of the current position. 523 * The position and the other index parameters are not changed. 524 * 525 * @param buf a byte buffer 526 * @return the byte array 527 * @see #getBytes(ByteBuffer) 528 */ 529 public static byte[] toBytes(ByteBuffer buf) { 530 ByteBuffer dup = buf.duplicate(); 531 dup.position(0); 532 return readBytes(dup); 533 } 534 535 private static byte[] readBytes(ByteBuffer buf) { 536 byte [] result = new byte[buf.remaining()]; 537 buf.get(result); 538 return result; 539 } 540 541 /** 542 * @param b Presumed UTF-8 encoded byte array. 543 * @return String made from <code>b</code> 544 */ 545 public static String toString(final byte [] b) { 546 if (b == null) { 547 return null; 548 } 549 return toString(b, 0, b.length); 550 } 551 552 /** 553 * Joins two byte arrays together using a separator. 554 * @param b1 The first byte array. 555 * @param sep The separator to use. 556 * @param b2 The second byte array. 557 */ 558 public static String toString(final byte [] b1, 559 String sep, 560 final byte [] b2) { 561 return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length); 562 } 563 564 /** 565 * This method will convert utf8 encoded bytes into a string. If 566 * the given byte array is null, this method will return null. 567 * 568 * @param b Presumed UTF-8 encoded byte array. 569 * @param off offset into array 570 * @return String made from <code>b</code> or null 571 */ 572 public static String toString(final byte[] b, int off) { 573 if (b == null) { 574 return null; 575 } 576 int len = b.length - off; 577 if (len <= 0) { 578 return ""; 579 } 580 try { 581 return new String(b, off, len, UTF8_CSN); 582 } catch (UnsupportedEncodingException e) { 583 // should never happen! 584 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 585 } 586 } 587 588 /** 589 * This method will convert utf8 encoded bytes into a string. If 590 * the given byte array is null, this method will return null. 591 * 592 * @param b Presumed UTF-8 encoded byte array. 593 * @param off offset into array 594 * @param len length of utf-8 sequence 595 * @return String made from <code>b</code> or null 596 */ 597 public static String toString(final byte[] b, int off, int len) { 598 if (b == null) { 599 return null; 600 } 601 if (len == 0) { 602 return ""; 603 } 604 try { 605 return new String(b, off, len, UTF8_CSN); 606 } catch (UnsupportedEncodingException e) { 607 // should never happen! 608 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 609 } 610 } 611 612 /** 613 * Write a printable representation of a byte array. 614 * 615 * @param b byte array 616 * @return string 617 * @see #toStringBinary(byte[], int, int) 618 */ 619 public static String toStringBinary(final byte [] b) { 620 if (b == null) 621 return "null"; 622 return toStringBinary(b, 0, b.length); 623 } 624 625 /** 626 * Converts the given byte buffer to a printable representation, 627 * from the index 0 (inclusive) to the limit (exclusive), 628 * regardless of the current position. 629 * The position and the other index parameters are not changed. 630 * 631 * @param buf a byte buffer 632 * @return a string representation of the buffer's binary contents 633 * @see #toBytes(ByteBuffer) 634 * @see #getBytes(ByteBuffer) 635 */ 636 public static String toStringBinary(ByteBuffer buf) { 637 if (buf == null) 638 return "null"; 639 if (buf.hasArray()) { 640 return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); 641 } 642 return toStringBinary(toBytes(buf)); 643 } 644 645 private static final char[] HEX_CHARS_UPPER = { 646 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' 647 }; 648 649 /** 650 * Write a printable representation of a byte array. Non-printable 651 * characters are hex escaped in the format \\x%02X, eg: 652 * \x00 \x05 etc 653 * 654 * @param b array to write out 655 * @param off offset to start at 656 * @param len length to write 657 * @return string output 658 */ 659 public static String toStringBinary(final byte [] b, int off, int len) { 660 StringBuilder result = new StringBuilder(); 661 // Just in case we are passed a 'len' that is > buffer length... 662 if (off >= b.length) return result.toString(); 663 if (off + len > b.length) len = b.length - off; 664 for (int i = off; i < off + len ; ++i) { 665 int ch = b[i] & 0xFF; 666 if (ch >= ' ' && ch <= '~' && ch != '\\') { 667 result.append((char)ch); 668 } else { 669 result.append("\\x"); 670 result.append(HEX_CHARS_UPPER[ch / 0x10]); 671 result.append(HEX_CHARS_UPPER[ch % 0x10]); 672 } 673 } 674 return result.toString(); 675 } 676 677 private static boolean isHexDigit(char c) { 678 return 679 (c >= 'A' && c <= 'F') || 680 (c >= '0' && c <= '9'); 681 } 682 683 /** 684 * Takes a ASCII digit in the range A-F0-9 and returns 685 * the corresponding integer/ordinal value. 686 * @param ch The hex digit. 687 * @return The converted hex value as a byte. 688 */ 689 public static byte toBinaryFromHex(byte ch) { 690 if (ch >= 'A' && ch <= 'F') 691 return (byte) ((byte)10 + (byte) (ch - 'A')); 692 // else 693 return (byte) (ch - '0'); 694 } 695 696 public static byte [] toBytesBinary(String in) { 697 // this may be bigger than we need, but let's be safe. 698 byte [] b = new byte[in.length()]; 699 int size = 0; 700 for (int i = 0; i < in.length(); ++i) { 701 char ch = in.charAt(i); 702 if (ch == '\\' && in.length() > i+1 && in.charAt(i+1) == 'x') { 703 // ok, take next 2 hex digits. 704 char hd1 = in.charAt(i+2); 705 char hd2 = in.charAt(i+3); 706 707 // they need to be A-F0-9: 708 if (!isHexDigit(hd1) || 709 !isHexDigit(hd2)) { 710 // bogus escape code, ignore: 711 continue; 712 } 713 // turn hex ASCII digit -> number 714 byte d = (byte) ((toBinaryFromHex((byte)hd1) << 4) + toBinaryFromHex((byte)hd2)); 715 716 b[size++] = d; 717 i += 3; // skip 3 718 } else { 719 b[size++] = (byte) ch; 720 } 721 } 722 // resize: 723 byte [] b2 = new byte[size]; 724 System.arraycopy(b, 0, b2, 0, size); 725 return b2; 726 } 727 728 /** 729 * Converts a string to a UTF-8 byte array. 730 * @param s string 731 * @return the byte array 732 */ 733 public static byte[] toBytes(String s) { 734 try { 735 return s.getBytes(UTF8_CSN); 736 } catch (UnsupportedEncodingException e) { 737 // should never happen! 738 throw new IllegalArgumentException("UTF8 decoding is not supported", e); 739 } 740 } 741 742 /** 743 * Convert a boolean to a byte array. True becomes -1 744 * and false becomes 0. 745 * 746 * @param b value 747 * @return <code>b</code> encoded in a byte array. 748 */ 749 public static byte [] toBytes(final boolean b) { 750 return new byte[] { b ? (byte) -1 : (byte) 0 }; 751 } 752 753 /** 754 * Reverses {@link #toBytes(boolean)} 755 * @param b array 756 * @return True or false. 757 */ 758 public static boolean toBoolean(final byte [] b) { 759 if (b.length != 1) { 760 throw new IllegalArgumentException("Array has wrong size: " + b.length); 761 } 762 return b[0] != (byte) 0; 763 } 764 765 /** 766 * Convert a long value to a byte array using big-endian. 767 * 768 * @param val value to convert 769 * @return the byte array 770 */ 771 public static byte[] toBytes(long val) { 772 byte [] b = new byte[8]; 773 for (int i = 7; i > 0; i--) { 774 b[i] = (byte) val; 775 val >>>= 8; 776 } 777 b[0] = (byte) val; 778 return b; 779 } 780 781 /** 782 * Converts a byte array to a long value. Reverses 783 * {@link #toBytes(long)} 784 * @param bytes array 785 * @return the long value 786 */ 787 public static long toLong(byte[] bytes) { 788 return toLong(bytes, 0, SIZEOF_LONG); 789 } 790 791 /** 792 * Converts a byte array to a long value. Assumes there will be 793 * {@link #SIZEOF_LONG} bytes available. 794 * 795 * @param bytes bytes 796 * @param offset offset 797 * @return the long value 798 */ 799 public static long toLong(byte[] bytes, int offset) { 800 return toLong(bytes, offset, SIZEOF_LONG); 801 } 802 803 /** 804 * Converts a byte array to a long value. 805 * 806 * @param bytes array of bytes 807 * @param offset offset into array 808 * @param length length of data (must be {@link #SIZEOF_LONG}) 809 * @return the long value 810 * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or 811 * if there's not enough room in the array at the offset indicated. 812 */ 813 public static long toLong(byte[] bytes, int offset, final int length) { 814 if (length != SIZEOF_LONG || offset + length > bytes.length) { 815 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG); 816 } 817 return ConverterHolder.BEST_CONVERTER.toLong(bytes, offset, length); 818 } 819 820 private static IllegalArgumentException 821 explainWrongLengthOrOffset(final byte[] bytes, 822 final int offset, 823 final int length, 824 final int expectedLength) { 825 String reason; 826 if (length != expectedLength) { 827 reason = "Wrong length: " + length + ", expected " + expectedLength; 828 } else { 829 reason = "offset (" + offset + ") + length (" + length + ") exceed the" 830 + " capacity of the array: " + bytes.length; 831 } 832 return new IllegalArgumentException(reason); 833 } 834 835 /** 836 * Put a long value out to the specified byte array position. 837 * @param bytes the byte array 838 * @param offset position in the array 839 * @param val long to write out 840 * @return incremented offset 841 * @throws IllegalArgumentException if the byte array given doesn't have 842 * enough room at the offset specified. 843 */ 844 public static int putLong(byte[] bytes, int offset, long val) { 845 if (bytes.length - offset < SIZEOF_LONG) { 846 throw new IllegalArgumentException("Not enough room to put a long at" 847 + " offset " + offset + " in a " + bytes.length + " byte array"); 848 } 849 return ConverterHolder.BEST_CONVERTER.putLong(bytes, offset, val); 850 } 851 852 /** 853 * Put a long value out to the specified byte array position (Unsafe). 854 * @param bytes the byte array 855 * @param offset position in the array 856 * @param val long to write out 857 * @return incremented offset 858 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 859 */ 860 @Deprecated 861 public static int putLongUnsafe(byte[] bytes, int offset, long val) { 862 return UnsafeAccess.putLong(bytes, offset, val); 863 } 864 865 /** 866 * Presumes float encoded as IEEE 754 floating-point "single format" 867 * @param bytes byte array 868 * @return Float made from passed byte array. 869 */ 870 public static float toFloat(byte [] bytes) { 871 return toFloat(bytes, 0); 872 } 873 874 /** 875 * Presumes float encoded as IEEE 754 floating-point "single format" 876 * @param bytes array to convert 877 * @param offset offset into array 878 * @return Float made from passed byte array. 879 */ 880 public static float toFloat(byte [] bytes, int offset) { 881 return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT)); 882 } 883 884 /** 885 * @param bytes byte array 886 * @param offset offset to write to 887 * @param f float value 888 * @return New offset in <code>bytes</code> 889 */ 890 public static int putFloat(byte [] bytes, int offset, float f) { 891 return putInt(bytes, offset, Float.floatToRawIntBits(f)); 892 } 893 894 /** 895 * @param f float value 896 * @return the float represented as byte [] 897 */ 898 public static byte [] toBytes(final float f) { 899 // Encode it as int 900 return Bytes.toBytes(Float.floatToRawIntBits(f)); 901 } 902 903 /** 904 * @param bytes byte array 905 * @return Return double made from passed bytes. 906 */ 907 public static double toDouble(final byte [] bytes) { 908 return toDouble(bytes, 0); 909 } 910 911 /** 912 * @param bytes byte array 913 * @param offset offset where double is 914 * @return Return double made from passed bytes. 915 */ 916 public static double toDouble(final byte [] bytes, final int offset) { 917 return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); 918 } 919 920 /** 921 * @param bytes byte array 922 * @param offset offset to write to 923 * @param d value 924 * @return New offset into array <code>bytes</code> 925 */ 926 public static int putDouble(byte [] bytes, int offset, double d) { 927 return putLong(bytes, offset, Double.doubleToLongBits(d)); 928 } 929 930 /** 931 * Serialize a double as the IEEE 754 double format output. The resultant 932 * array will be 8 bytes long. 933 * 934 * @param d value 935 * @return the double represented as byte [] 936 */ 937 public static byte [] toBytes(final double d) { 938 // Encode it as a long 939 return Bytes.toBytes(Double.doubleToRawLongBits(d)); 940 } 941 942 /** 943 * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt 944 * does. 945 * 946 * @param val value 947 * @return the byte array 948 */ 949 public static byte[] toBytes(int val) { 950 byte [] b = new byte[4]; 951 for(int i = 3; i > 0; i--) { 952 b[i] = (byte) val; 953 val >>>= 8; 954 } 955 b[0] = (byte) val; 956 return b; 957 } 958 959 /** 960 * Converts a byte array to an int value 961 * @param bytes byte array 962 * @return the int value 963 */ 964 public static int toInt(byte[] bytes) { 965 return toInt(bytes, 0, SIZEOF_INT); 966 } 967 968 /** 969 * Converts a byte array to an int value 970 * @param bytes byte array 971 * @param offset offset into array 972 * @return the int value 973 */ 974 public static int toInt(byte[] bytes, int offset) { 975 return toInt(bytes, offset, SIZEOF_INT); 976 } 977 978 /** 979 * Converts a byte array to an int value 980 * @param bytes byte array 981 * @param offset offset into array 982 * @param length length of int (has to be {@link #SIZEOF_INT}) 983 * @return the int value 984 * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or 985 * if there's not enough room in the array at the offset indicated. 986 */ 987 public static int toInt(byte[] bytes, int offset, final int length) { 988 if (length != SIZEOF_INT || offset + length > bytes.length) { 989 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT); 990 } 991 return ConverterHolder.BEST_CONVERTER.toInt(bytes, offset, length); 992 } 993 994 /** 995 * Converts a byte array to an int value (Unsafe version) 996 * @param bytes byte array 997 * @param offset offset into array 998 * @return the int value 999 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1000 */ 1001 @Deprecated 1002 public static int toIntUnsafe(byte[] bytes, int offset) { 1003 return UnsafeAccess.toInt(bytes, offset); 1004 } 1005 1006 /** 1007 * Converts a byte array to an short value (Unsafe version) 1008 * @param bytes byte array 1009 * @param offset offset into array 1010 * @return the short value 1011 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1012 */ 1013 @Deprecated 1014 public static short toShortUnsafe(byte[] bytes, int offset) { 1015 return UnsafeAccess.toShort(bytes, offset); 1016 } 1017 1018 /** 1019 * Converts a byte array to an long value (Unsafe version) 1020 * @param bytes byte array 1021 * @param offset offset into array 1022 * @return the long value 1023 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1024 */ 1025 @Deprecated 1026 public static long toLongUnsafe(byte[] bytes, int offset) { 1027 return UnsafeAccess.toLong(bytes, offset); 1028 } 1029 1030 /** 1031 * Converts a byte array to an int value 1032 * @param bytes byte array 1033 * @param offset offset into array 1034 * @param length how many bytes should be considered for creating int 1035 * @return the int value 1036 * @throws IllegalArgumentException if there's not enough room in the array at the offset 1037 * indicated. 1038 */ 1039 public static int readAsInt(byte[] bytes, int offset, final int length) { 1040 if (offset + length > bytes.length) { 1041 throw new IllegalArgumentException("offset (" + offset + ") + length (" + length 1042 + ") exceed the" + " capacity of the array: " + bytes.length); 1043 } 1044 int n = 0; 1045 for(int i = offset; i < (offset + length); i++) { 1046 n <<= 8; 1047 n ^= bytes[i] & 0xFF; 1048 } 1049 return n; 1050 } 1051 1052 /** 1053 * Put an int value out to the specified byte array position. 1054 * @param bytes the byte array 1055 * @param offset position in the array 1056 * @param val int to write out 1057 * @return incremented offset 1058 * @throws IllegalArgumentException if the byte array given doesn't have 1059 * enough room at the offset specified. 1060 */ 1061 public static int putInt(byte[] bytes, int offset, int val) { 1062 if (bytes.length - offset < SIZEOF_INT) { 1063 throw new IllegalArgumentException("Not enough room to put an int at" 1064 + " offset " + offset + " in a " + bytes.length + " byte array"); 1065 } 1066 return ConverterHolder.BEST_CONVERTER.putInt(bytes, offset, val); 1067 } 1068 1069 /** 1070 * Put an int value out to the specified byte array position (Unsafe). 1071 * @param bytes the byte array 1072 * @param offset position in the array 1073 * @param val int to write out 1074 * @return incremented offset 1075 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1076 */ 1077 @Deprecated 1078 public static int putIntUnsafe(byte[] bytes, int offset, int val) { 1079 return UnsafeAccess.putInt(bytes, offset, val); 1080 } 1081 1082 /** 1083 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long. 1084 * @param val value 1085 * @return the byte array 1086 */ 1087 public static byte[] toBytes(short val) { 1088 byte[] b = new byte[SIZEOF_SHORT]; 1089 b[1] = (byte) val; 1090 val >>= 8; 1091 b[0] = (byte) val; 1092 return b; 1093 } 1094 1095 /** 1096 * Converts a byte array to a short value 1097 * @param bytes byte array 1098 * @return the short value 1099 */ 1100 public static short toShort(byte[] bytes) { 1101 return toShort(bytes, 0, SIZEOF_SHORT); 1102 } 1103 1104 /** 1105 * Converts a byte array to a short value 1106 * @param bytes byte array 1107 * @param offset offset into array 1108 * @return the short value 1109 */ 1110 public static short toShort(byte[] bytes, int offset) { 1111 return toShort(bytes, offset, SIZEOF_SHORT); 1112 } 1113 1114 /** 1115 * Converts a byte array to a short value 1116 * @param bytes byte array 1117 * @param offset offset into array 1118 * @param length length, has to be {@link #SIZEOF_SHORT} 1119 * @return the short value 1120 * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT} 1121 * or if there's not enough room in the array at the offset indicated. 1122 */ 1123 public static short toShort(byte[] bytes, int offset, final int length) { 1124 if (length != SIZEOF_SHORT || offset + length > bytes.length) { 1125 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT); 1126 } 1127 return ConverterHolder.BEST_CONVERTER.toShort(bytes, offset, length); 1128 } 1129 1130 /** 1131 * Returns a new byte array, copied from the given {@code buf}, 1132 * from the position (inclusive) to the limit (exclusive). 1133 * The position and the other index parameters are not changed. 1134 * 1135 * @param buf a byte buffer 1136 * @return the byte array 1137 * @see #toBytes(ByteBuffer) 1138 */ 1139 public static byte[] getBytes(ByteBuffer buf) { 1140 return readBytes(buf.duplicate()); 1141 } 1142 1143 /** 1144 * Put a short value out to the specified byte array position. 1145 * @param bytes the byte array 1146 * @param offset position in the array 1147 * @param val short to write out 1148 * @return incremented offset 1149 * @throws IllegalArgumentException if the byte array given doesn't have 1150 * enough room at the offset specified. 1151 */ 1152 public static int putShort(byte[] bytes, int offset, short val) { 1153 if (bytes.length - offset < SIZEOF_SHORT) { 1154 throw new IllegalArgumentException("Not enough room to put a short at" 1155 + " offset " + offset + " in a " + bytes.length + " byte array"); 1156 } 1157 return ConverterHolder.BEST_CONVERTER.putShort(bytes, offset, val); 1158 } 1159 1160 /** 1161 * Put a short value out to the specified byte array position (Unsafe). 1162 * @param bytes the byte array 1163 * @param offset position in the array 1164 * @param val short to write out 1165 * @return incremented offset 1166 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1167 */ 1168 @Deprecated 1169 public static int putShortUnsafe(byte[] bytes, int offset, short val) { 1170 return UnsafeAccess.putShort(bytes, offset, val); 1171 } 1172 1173 /** 1174 * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of 1175 * the short will be put into the array. The caller of the API need to make sure they will not 1176 * loose the value by doing so. This is useful to store an unsigned short which is represented as 1177 * int in other parts. 1178 * @param bytes the byte array 1179 * @param offset position in the array 1180 * @param val value to write out 1181 * @return incremented offset 1182 * @throws IllegalArgumentException if the byte array given doesn't have 1183 * enough room at the offset specified. 1184 */ 1185 public static int putAsShort(byte[] bytes, int offset, int val) { 1186 if (bytes.length - offset < SIZEOF_SHORT) { 1187 throw new IllegalArgumentException("Not enough room to put a short at" 1188 + " offset " + offset + " in a " + bytes.length + " byte array"); 1189 } 1190 bytes[offset+1] = (byte) val; 1191 val >>= 8; 1192 bytes[offset] = (byte) val; 1193 return offset + SIZEOF_SHORT; 1194 } 1195 1196 /** 1197 * Convert a BigDecimal value to a byte array 1198 * 1199 * @param val 1200 * @return the byte array 1201 */ 1202 public static byte[] toBytes(BigDecimal val) { 1203 byte[] valueBytes = val.unscaledValue().toByteArray(); 1204 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1205 int offset = putInt(result, 0, val.scale()); 1206 putBytes(result, offset, valueBytes, 0, valueBytes.length); 1207 return result; 1208 } 1209 1210 1211 /** 1212 * Converts a byte array to a BigDecimal 1213 * 1214 * @param bytes 1215 * @return the char value 1216 */ 1217 public static BigDecimal toBigDecimal(byte[] bytes) { 1218 return toBigDecimal(bytes, 0, bytes.length); 1219 } 1220 1221 /** 1222 * Converts a byte array to a BigDecimal value 1223 * 1224 * @param bytes 1225 * @param offset 1226 * @param length 1227 * @return the char value 1228 */ 1229 public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) { 1230 if (bytes == null || length < SIZEOF_INT + 1 || 1231 (offset + length > bytes.length)) { 1232 return null; 1233 } 1234 1235 int scale = toInt(bytes, offset); 1236 byte[] tcBytes = new byte[length - SIZEOF_INT]; 1237 System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); 1238 return new BigDecimal(new BigInteger(tcBytes), scale); 1239 } 1240 1241 /** 1242 * Put a BigDecimal value out to the specified byte array position. 1243 * 1244 * @param bytes the byte array 1245 * @param offset position in the array 1246 * @param val BigDecimal to write out 1247 * @return incremented offset 1248 */ 1249 public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { 1250 if (bytes == null) { 1251 return offset; 1252 } 1253 1254 byte[] valueBytes = val.unscaledValue().toByteArray(); 1255 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1256 offset = putInt(result, offset, val.scale()); 1257 return putBytes(result, offset, valueBytes, 0, valueBytes.length); 1258 } 1259 1260 /** 1261 * @param vint Integer to make a vint of. 1262 * @return Vint as bytes array. 1263 */ 1264 public static byte [] vintToBytes(final long vint) { 1265 long i = vint; 1266 int size = WritableUtils.getVIntSize(i); 1267 byte [] result = new byte[size]; 1268 int offset = 0; 1269 if (i >= -112 && i <= 127) { 1270 result[offset] = (byte) i; 1271 return result; 1272 } 1273 1274 int len = -112; 1275 if (i < 0) { 1276 i ^= -1L; // take one's complement' 1277 len = -120; 1278 } 1279 1280 long tmp = i; 1281 while (tmp != 0) { 1282 tmp = tmp >> 8; 1283 len--; 1284 } 1285 1286 result[offset++] = (byte) len; 1287 1288 len = (len < -120) ? -(len + 120) : -(len + 112); 1289 1290 for (int idx = len; idx != 0; idx--) { 1291 int shiftbits = (idx - 1) * 8; 1292 long mask = 0xFFL << shiftbits; 1293 result[offset++] = (byte)((i & mask) >> shiftbits); 1294 } 1295 return result; 1296 } 1297 1298 /** 1299 * @param buffer buffer to convert 1300 * @return vint bytes as an integer. 1301 */ 1302 public static long bytesToVint(final byte [] buffer) { 1303 int offset = 0; 1304 byte firstByte = buffer[offset++]; 1305 int len = WritableUtils.decodeVIntSize(firstByte); 1306 if (len == 1) { 1307 return firstByte; 1308 } 1309 long i = 0; 1310 for (int idx = 0; idx < len-1; idx++) { 1311 byte b = buffer[offset++]; 1312 i = i << 8; 1313 i = i | (b & 0xFF); 1314 } 1315 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1316 } 1317 1318 /** 1319 * Reads a zero-compressed encoded long from input buffer and returns it. 1320 * @param buffer Binary array 1321 * @param offset Offset into array at which vint begins. 1322 * @throws java.io.IOException e 1323 * @return deserialized long from buffer. 1324 * @deprecated since 0.98.12. Use {@link #readAsVLong(byte[],int)} instead. 1325 * @see #readAsVLong(byte[], int) 1326 * @see <a href="https://issues.apache.org/jira/browse/HBASE-6919">HBASE-6919</a> 1327 */ 1328 @Deprecated 1329 public static long readVLong(final byte [] buffer, final int offset) 1330 throws IOException { 1331 return readAsVLong(buffer, offset); 1332 } 1333 1334 /** 1335 * Reads a zero-compressed encoded long from input buffer and returns it. 1336 * @param buffer Binary array 1337 * @param offset Offset into array at which vint begins. 1338 * @return deserialized long from buffer. 1339 */ 1340 public static long readAsVLong(final byte [] buffer, final int offset) { 1341 byte firstByte = buffer[offset]; 1342 int len = WritableUtils.decodeVIntSize(firstByte); 1343 if (len == 1) { 1344 return firstByte; 1345 } 1346 long i = 0; 1347 for (int idx = 0; idx < len-1; idx++) { 1348 byte b = buffer[offset + 1 + idx]; 1349 i = i << 8; 1350 i = i | (b & 0xFF); 1351 } 1352 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1353 } 1354 1355 /** 1356 * @param left left operand 1357 * @param right right operand 1358 * @return 0 if equal, < 0 if left is less than right, etc. 1359 */ 1360 public static int compareTo(final byte [] left, final byte [] right) { 1361 return LexicographicalComparerHolder.BEST_COMPARER. 1362 compareTo(left, 0, left == null? 0: left.length, right, 0, right == null? 0: right.length); 1363 } 1364 1365 /** 1366 * Lexicographically compare two arrays. 1367 * 1368 * @param buffer1 left operand 1369 * @param buffer2 right operand 1370 * @param offset1 Where to start comparing in the left buffer 1371 * @param offset2 Where to start comparing in the right buffer 1372 * @param length1 How much to compare from the left buffer 1373 * @param length2 How much to compare from the right buffer 1374 * @return 0 if equal, < 0 if left is less than right, etc. 1375 */ 1376 public static int compareTo(byte[] buffer1, int offset1, int length1, 1377 byte[] buffer2, int offset2, int length2) { 1378 return LexicographicalComparerHolder.BEST_COMPARER. 1379 compareTo(buffer1, offset1, length1, buffer2, offset2, length2); 1380 } 1381 1382 interface Comparer<T> { 1383 int compareTo( 1384 T buffer1, int offset1, int length1, T buffer2, int offset2, int length2 1385 ); 1386 } 1387 1388 static abstract class Converter { 1389 abstract long toLong(byte[] bytes, int offset, int length); 1390 abstract int putLong(byte[] bytes, int offset, long val); 1391 1392 abstract int toInt(byte[] bytes, int offset, final int length); 1393 abstract int putInt(byte[] bytes, int offset, int val); 1394 1395 abstract short toShort(byte[] bytes, int offset, final int length); 1396 abstract int putShort(byte[] bytes, int offset, short val); 1397 1398 } 1399 1400 @VisibleForTesting 1401 static Comparer<byte[]> lexicographicalComparerJavaImpl() { 1402 return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; 1403 } 1404 1405 static class ConverterHolder { 1406 static final String UNSAFE_CONVERTER_NAME = 1407 ConverterHolder.class.getName() + "$UnsafeConverter"; 1408 1409 static final Converter BEST_CONVERTER = getBestConverter(); 1410 /** 1411 * Returns the Unsafe-using Converter, or falls back to the pure-Java 1412 * implementation if unable to do so. 1413 */ 1414 static Converter getBestConverter() { 1415 try { 1416 Class<?> theClass = Class.forName(UNSAFE_CONVERTER_NAME); 1417 1418 // yes, UnsafeComparer does implement Comparer<byte[]> 1419 @SuppressWarnings("unchecked") 1420 Converter converter = (Converter) theClass.getConstructor().newInstance(); 1421 return converter; 1422 } catch (Throwable t) { // ensure we really catch *everything* 1423 return PureJavaConverter.INSTANCE; 1424 } 1425 } 1426 1427 protected static final class PureJavaConverter extends Converter { 1428 static final PureJavaConverter INSTANCE = new PureJavaConverter(); 1429 1430 private PureJavaConverter() {} 1431 1432 @Override 1433 long toLong(byte[] bytes, int offset, int length) { 1434 long l = 0; 1435 for(int i = offset; i < offset + length; i++) { 1436 l <<= 8; 1437 l ^= bytes[i] & 0xFF; 1438 } 1439 return l; 1440 } 1441 1442 @Override 1443 int putLong(byte[] bytes, int offset, long val) { 1444 for(int i = offset + 7; i > offset; i--) { 1445 bytes[i] = (byte) val; 1446 val >>>= 8; 1447 } 1448 bytes[offset] = (byte) val; 1449 return offset + SIZEOF_LONG; 1450 } 1451 1452 @Override 1453 int toInt(byte[] bytes, int offset, int length) { 1454 int n = 0; 1455 for(int i = offset; i < (offset + length); i++) { 1456 n <<= 8; 1457 n ^= bytes[i] & 0xFF; 1458 } 1459 return n; 1460 } 1461 1462 @Override 1463 int putInt(byte[] bytes, int offset, int val) { 1464 for(int i= offset + 3; i > offset; i--) { 1465 bytes[i] = (byte) val; 1466 val >>>= 8; 1467 } 1468 bytes[offset] = (byte) val; 1469 return offset + SIZEOF_INT; 1470 } 1471 1472 @Override 1473 short toShort(byte[] bytes, int offset, int length) { 1474 short n = 0; 1475 n = (short) ((n ^ bytes[offset]) & 0xFF); 1476 n = (short) (n << 8); 1477 n ^= (short) (bytes[offset+1] & 0xFF); 1478 return n; 1479 } 1480 1481 @Override 1482 int putShort(byte[] bytes, int offset, short val) { 1483 bytes[offset+1] = (byte) val; 1484 val >>= 8; 1485 bytes[offset] = (byte) val; 1486 return offset + SIZEOF_SHORT; 1487 } 1488 } 1489 1490 protected static final class UnsafeConverter extends Converter { 1491 1492 static final Unsafe theUnsafe; 1493 1494 public UnsafeConverter() {} 1495 1496 static { 1497 if (UNSAFE_UNALIGNED) { 1498 theUnsafe = UnsafeAccess.theUnsafe; 1499 } else { 1500 // It doesn't matter what we throw; 1501 // it's swallowed in getBestComparer(). 1502 throw new Error(); 1503 } 1504 1505 // sanity check - this should never fail 1506 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 1507 throw new AssertionError(); 1508 } 1509 } 1510 1511 @Override 1512 long toLong(byte[] bytes, int offset, int length) { 1513 return UnsafeAccess.toLong(bytes, offset); 1514 } 1515 1516 @Override 1517 int putLong(byte[] bytes, int offset, long val) { 1518 return UnsafeAccess.putLong(bytes, offset, val); 1519 } 1520 1521 @Override 1522 int toInt(byte[] bytes, int offset, int length) { 1523 return UnsafeAccess.toInt(bytes, offset); 1524 } 1525 1526 @Override 1527 int putInt(byte[] bytes, int offset, int val) { 1528 return UnsafeAccess.putInt(bytes, offset, val); 1529 } 1530 1531 @Override 1532 short toShort(byte[] bytes, int offset, int length) { 1533 return UnsafeAccess.toShort(bytes, offset); 1534 } 1535 1536 @Override 1537 int putShort(byte[] bytes, int offset, short val) { 1538 return UnsafeAccess.putShort(bytes, offset, val); 1539 } 1540 } 1541 } 1542 1543 /** 1544 * Provides a lexicographical comparer implementation; either a Java 1545 * implementation or a faster implementation based on {@link Unsafe}. 1546 * 1547 * <p>Uses reflection to gracefully fall back to the Java implementation if 1548 * {@code Unsafe} isn't available. 1549 */ 1550 @VisibleForTesting 1551 static class LexicographicalComparerHolder { 1552 static final String UNSAFE_COMPARER_NAME = 1553 LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; 1554 1555 static final Comparer<byte[]> BEST_COMPARER = getBestComparer(); 1556 /** 1557 * Returns the Unsafe-using Comparer, or falls back to the pure-Java 1558 * implementation if unable to do so. 1559 */ 1560 static Comparer<byte[]> getBestComparer() { 1561 try { 1562 Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME); 1563 1564 // yes, UnsafeComparer does implement Comparer<byte[]> 1565 @SuppressWarnings("unchecked") 1566 Comparer<byte[]> comparer = 1567 (Comparer<byte[]>) theClass.getEnumConstants()[0]; 1568 return comparer; 1569 } catch (Throwable t) { // ensure we really catch *everything* 1570 return lexicographicalComparerJavaImpl(); 1571 } 1572 } 1573 1574 enum PureJavaComparer implements Comparer<byte[]> { 1575 INSTANCE; 1576 1577 @Override 1578 public int compareTo(byte[] buffer1, int offset1, int length1, 1579 byte[] buffer2, int offset2, int length2) { 1580 // Short circuit equal case 1581 if (buffer1 == buffer2 && 1582 offset1 == offset2 && 1583 length1 == length2) { 1584 return 0; 1585 } 1586 // Bring WritableComparator code local 1587 int end1 = offset1 + length1; 1588 int end2 = offset2 + length2; 1589 for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { 1590 int a = (buffer1[i] & 0xff); 1591 int b = (buffer2[j] & 0xff); 1592 if (a != b) { 1593 return a - b; 1594 } 1595 } 1596 return length1 - length2; 1597 } 1598 } 1599 1600 @VisibleForTesting 1601 enum UnsafeComparer implements Comparer<byte[]> { 1602 INSTANCE; 1603 1604 static final Unsafe theUnsafe; 1605 static { 1606 if (UNSAFE_UNALIGNED) { 1607 theUnsafe = UnsafeAccess.theUnsafe; 1608 } else { 1609 // It doesn't matter what we throw; 1610 // it's swallowed in getBestComparer(). 1611 throw new Error(); 1612 } 1613 1614 // sanity check - this should never fail 1615 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 1616 throw new AssertionError(); 1617 } 1618 } 1619 1620 /** 1621 * Lexicographically compare two arrays. 1622 * 1623 * @param buffer1 left operand 1624 * @param buffer2 right operand 1625 * @param offset1 Where to start comparing in the left buffer 1626 * @param offset2 Where to start comparing in the right buffer 1627 * @param length1 How much to compare from the left buffer 1628 * @param length2 How much to compare from the right buffer 1629 * @return 0 if equal, < 0 if left is less than right, etc. 1630 */ 1631 @Override 1632 public int compareTo(byte[] buffer1, int offset1, int length1, 1633 byte[] buffer2, int offset2, int length2) { 1634 1635 // Short circuit equal case 1636 if (buffer1 == buffer2 && 1637 offset1 == offset2 && 1638 length1 == length2) { 1639 return 0; 1640 } 1641 final int stride = 8; 1642 final int minLength = Math.min(length1, length2); 1643 int strideLimit = minLength & ~(stride - 1); 1644 final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1645 final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1646 int i; 1647 1648 /* 1649 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower 1650 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit. 1651 */ 1652 for (i = 0; i < strideLimit; i += stride) { 1653 long lw = theUnsafe.getLong(buffer1, offset1Adj + i); 1654 long rw = theUnsafe.getLong(buffer2, offset2Adj + i); 1655 if (lw != rw) { 1656 if(!UnsafeAccess.LITTLE_ENDIAN) { 1657 return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; 1658 } 1659 1660 /* 1661 * We want to compare only the first index where left[index] != right[index]. This 1662 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are 1663 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant 1664 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get 1665 * that least significant nonzero byte. This comparison logic is based on UnsignedBytes 1666 * comparator from guava v21 1667 */ 1668 int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; 1669 return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF)); 1670 } 1671 } 1672 1673 // The epilogue to cover the last (minLength % stride) elements. 1674 for (; i < minLength; i++) { 1675 int a = (buffer1[offset1 + i] & 0xFF); 1676 int b = (buffer2[offset2 + i] & 0xFF); 1677 if (a != b) { 1678 return a - b; 1679 } 1680 } 1681 return length1 - length2; 1682 } 1683 } 1684 } 1685 1686 /** 1687 * @param left left operand 1688 * @param right right operand 1689 * @return True if equal 1690 */ 1691 public static boolean equals(final byte [] left, final byte [] right) { 1692 // Could use Arrays.equals? 1693 //noinspection SimplifiableConditionalExpression 1694 if (left == right) return true; 1695 if (left == null || right == null) return false; 1696 if (left.length != right.length) return false; 1697 if (left.length == 0) return true; 1698 1699 // Since we're often comparing adjacent sorted data, 1700 // it's usual to have equal arrays except for the very last byte 1701 // so check that first 1702 if (left[left.length - 1] != right[right.length - 1]) return false; 1703 1704 return compareTo(left, right) == 0; 1705 } 1706 1707 public static boolean equals(final byte[] left, int leftOffset, int leftLen, 1708 final byte[] right, int rightOffset, int rightLen) { 1709 // short circuit case 1710 if (left == right && 1711 leftOffset == rightOffset && 1712 leftLen == rightLen) { 1713 return true; 1714 } 1715 // different lengths fast check 1716 if (leftLen != rightLen) { 1717 return false; 1718 } 1719 if (leftLen == 0) { 1720 return true; 1721 } 1722 1723 // Since we're often comparing adjacent sorted data, 1724 // it's usual to have equal arrays except for the very last byte 1725 // so check that first 1726 if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) return false; 1727 1728 return LexicographicalComparerHolder.BEST_COMPARER. 1729 compareTo(left, leftOffset, leftLen, right, rightOffset, rightLen) == 0; 1730 } 1731 1732 1733 /** 1734 * @param a left operand 1735 * @param buf right operand 1736 * @return True if equal 1737 */ 1738 public static boolean equals(byte[] a, ByteBuffer buf) { 1739 if (a == null) return buf == null; 1740 if (buf == null) return false; 1741 if (a.length != buf.remaining()) return false; 1742 1743 // Thou shalt not modify the original byte buffer in what should be read only operations. 1744 ByteBuffer b = buf.duplicate(); 1745 for (byte anA : a) { 1746 if (anA != b.get()) { 1747 return false; 1748 } 1749 } 1750 return true; 1751 } 1752 1753 1754 /** 1755 * Return true if the byte array on the right is a prefix of the byte 1756 * array on the left. 1757 */ 1758 public static boolean startsWith(byte[] bytes, byte[] prefix) { 1759 return bytes != null && prefix != null && 1760 bytes.length >= prefix.length && 1761 LexicographicalComparerHolder.BEST_COMPARER. 1762 compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0; 1763 } 1764 1765 /** 1766 * @param b bytes to hash 1767 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the 1768 * passed in array. This method is what {@link org.apache.hadoop.io.Text} 1769 * use calculating hash code. 1770 */ 1771 public static int hashCode(final byte [] b) { 1772 return hashCode(b, b.length); 1773 } 1774 1775 /** 1776 * @param b value 1777 * @param length length of the value 1778 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the 1779 * passed in array. This method is what {@link org.apache.hadoop.io.Text} 1780 * use calculating hash code. 1781 */ 1782 public static int hashCode(final byte [] b, final int length) { 1783 return WritableComparator.hashBytes(b, length); 1784 } 1785 1786 /** 1787 * @param b bytes to hash 1788 * @return A hash of <code>b</code> as an Integer that can be used as key in 1789 * Maps. 1790 */ 1791 public static Integer mapKey(final byte [] b) { 1792 return hashCode(b); 1793 } 1794 1795 /** 1796 * @param b bytes to hash 1797 * @param length length to hash 1798 * @return A hash of <code>b</code> as an Integer that can be used as key in 1799 * Maps. 1800 */ 1801 public static Integer mapKey(final byte [] b, final int length) { 1802 return hashCode(b, length); 1803 } 1804 1805 /** 1806 * @param a lower half 1807 * @param b upper half 1808 * @return New array that has a in lower half and b in upper half. 1809 */ 1810 public static byte [] add(final byte [] a, final byte [] b) { 1811 return add(a, b, EMPTY_BYTE_ARRAY); 1812 } 1813 1814 /** 1815 * @param a first third 1816 * @param b second third 1817 * @param c third third 1818 * @return New array made from a, b and c 1819 */ 1820 public static byte [] add(final byte [] a, final byte [] b, final byte [] c) { 1821 byte [] result = new byte[a.length + b.length + c.length]; 1822 System.arraycopy(a, 0, result, 0, a.length); 1823 System.arraycopy(b, 0, result, a.length, b.length); 1824 System.arraycopy(c, 0, result, a.length + b.length, c.length); 1825 return result; 1826 } 1827 1828 /** 1829 * @param arrays all the arrays to concatenate together. 1830 * @return New array made from the concatenation of the given arrays. 1831 */ 1832 public static byte [] add(final byte [][] arrays) { 1833 int length = 0; 1834 for (int i = 0; i < arrays.length; i++) { 1835 length += arrays[i].length; 1836 } 1837 byte [] result = new byte[length]; 1838 int index = 0; 1839 for (int i = 0; i < arrays.length; i++) { 1840 System.arraycopy(arrays[i], 0, result, index, arrays[i].length); 1841 index += arrays[i].length; 1842 } 1843 return result; 1844 } 1845 1846 /** 1847 * @param a array 1848 * @param length amount of bytes to grab 1849 * @return First <code>length</code> bytes from <code>a</code> 1850 */ 1851 public static byte [] head(final byte [] a, final int length) { 1852 if (a.length < length) { 1853 return null; 1854 } 1855 byte [] result = new byte[length]; 1856 System.arraycopy(a, 0, result, 0, length); 1857 return result; 1858 } 1859 1860 /** 1861 * @param a array 1862 * @param length amount of bytes to snarf 1863 * @return Last <code>length</code> bytes from <code>a</code> 1864 */ 1865 public static byte [] tail(final byte [] a, final int length) { 1866 if (a.length < length) { 1867 return null; 1868 } 1869 byte [] result = new byte[length]; 1870 System.arraycopy(a, a.length - length, result, 0, length); 1871 return result; 1872 } 1873 1874 /** 1875 * @param a array 1876 * @param length new array size 1877 * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes 1878 */ 1879 public static byte [] padHead(final byte [] a, final int length) { 1880 byte [] padding = new byte[length]; 1881 for (int i = 0; i < length; i++) { 1882 padding[i] = 0; 1883 } 1884 return add(padding,a); 1885 } 1886 1887 /** 1888 * @param a array 1889 * @param length new array size 1890 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes 1891 */ 1892 public static byte [] padTail(final byte [] a, final int length) { 1893 byte [] padding = new byte[length]; 1894 for (int i = 0; i < length; i++) { 1895 padding[i] = 0; 1896 } 1897 return add(a,padding); 1898 } 1899 1900 /** 1901 * Split passed range. Expensive operation relatively. Uses BigInteger math. 1902 * Useful splitting ranges for MapReduce jobs. 1903 * @param a Beginning of range 1904 * @param b End of range 1905 * @param num Number of times to split range. Pass 1 if you want to split 1906 * the range in two; i.e. one split. 1907 * @return Array of dividing values 1908 */ 1909 public static byte [][] split(final byte [] a, final byte [] b, final int num) { 1910 return split(a, b, false, num); 1911 } 1912 1913 /** 1914 * Split passed range. Expensive operation relatively. Uses BigInteger math. 1915 * Useful splitting ranges for MapReduce jobs. 1916 * @param a Beginning of range 1917 * @param b End of range 1918 * @param inclusive Whether the end of range is prefix-inclusive or is 1919 * considered an exclusive boundary. Automatic splits are generally exclusive 1920 * and manual splits with an explicit range utilize an inclusive end of range. 1921 * @param num Number of times to split range. Pass 1 if you want to split 1922 * the range in two; i.e. one split. 1923 * @return Array of dividing values 1924 */ 1925 public static byte[][] split(final byte[] a, final byte[] b, 1926 boolean inclusive, final int num) { 1927 byte[][] ret = new byte[num + 2][]; 1928 int i = 0; 1929 Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num); 1930 if (iter == null) 1931 return null; 1932 for (byte[] elem : iter) { 1933 ret[i++] = elem; 1934 } 1935 return ret; 1936 } 1937 1938 /** 1939 * Iterate over keys within the passed range, splitting at an [a,b) boundary. 1940 */ 1941 public static Iterable<byte[]> iterateOnSplits(final byte[] a, 1942 final byte[] b, final int num) 1943 { 1944 return iterateOnSplits(a, b, false, num); 1945 } 1946 1947 /** 1948 * Iterate over keys within the passed range. 1949 */ 1950 public static Iterable<byte[]> iterateOnSplits( 1951 final byte[] a, final byte[]b, boolean inclusive, final int num) 1952 { 1953 byte [] aPadded; 1954 byte [] bPadded; 1955 if (a.length < b.length) { 1956 aPadded = padTail(a, b.length - a.length); 1957 bPadded = b; 1958 } else if (b.length < a.length) { 1959 aPadded = a; 1960 bPadded = padTail(b, a.length - b.length); 1961 } else { 1962 aPadded = a; 1963 bPadded = b; 1964 } 1965 if (compareTo(aPadded,bPadded) >= 0) { 1966 throw new IllegalArgumentException("b <= a"); 1967 } 1968 if (num <= 0) { 1969 throw new IllegalArgumentException("num cannot be <= 0"); 1970 } 1971 byte [] prependHeader = {1, 0}; 1972 final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); 1973 final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); 1974 BigInteger diffBI = stopBI.subtract(startBI); 1975 if (inclusive) { 1976 diffBI = diffBI.add(BigInteger.ONE); 1977 } 1978 final BigInteger splitsBI = BigInteger.valueOf(num + 1); 1979 //when diffBI < splitBI, use an additional byte to increase diffBI 1980 if(diffBI.compareTo(splitsBI) < 0) { 1981 byte[] aPaddedAdditional = new byte[aPadded.length+1]; 1982 byte[] bPaddedAdditional = new byte[bPadded.length+1]; 1983 for (int i = 0; i < aPadded.length; i++){ 1984 aPaddedAdditional[i] = aPadded[i]; 1985 } 1986 for (int j = 0; j < bPadded.length; j++){ 1987 bPaddedAdditional[j] = bPadded[j]; 1988 } 1989 aPaddedAdditional[aPadded.length] = 0; 1990 bPaddedAdditional[bPadded.length] = 0; 1991 return iterateOnSplits(aPaddedAdditional, bPaddedAdditional, inclusive, num); 1992 } 1993 final BigInteger intervalBI; 1994 try { 1995 intervalBI = diffBI.divide(splitsBI); 1996 } catch(Exception e) { 1997 LOG.error("Exception caught during division", e); 1998 return null; 1999 } 2000 2001 final Iterator<byte[]> iterator = new Iterator<byte[]>() { 2002 private int i = -1; 2003 2004 @Override 2005 public boolean hasNext() { 2006 return i < num+1; 2007 } 2008 2009 @Override 2010 public byte[] next() { 2011 i++; 2012 if (i == 0) return a; 2013 if (i == num + 1) return b; 2014 2015 BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); 2016 byte [] padded = curBI.toByteArray(); 2017 if (padded[1] == 0) 2018 padded = tail(padded, padded.length - 2); 2019 else 2020 padded = tail(padded, padded.length - 1); 2021 return padded; 2022 } 2023 2024 @Override 2025 public void remove() { 2026 throw new UnsupportedOperationException(); 2027 } 2028 2029 }; 2030 2031 return new Iterable<byte[]>() { 2032 @Override 2033 public Iterator<byte[]> iterator() { 2034 return iterator; 2035 } 2036 }; 2037 } 2038 2039 /** 2040 * @param bytes array to hash 2041 * @param offset offset to start from 2042 * @param length length to hash 2043 * */ 2044 public static int hashCode(byte[] bytes, int offset, int length) { 2045 int hash = 1; 2046 for (int i = offset; i < offset + length; i++) 2047 hash = (31 * hash) + bytes[i]; 2048 return hash; 2049 } 2050 2051 /** 2052 * @param t operands 2053 * @return Array of byte arrays made from passed array of Text 2054 */ 2055 public static byte [][] toByteArrays(final String [] t) { 2056 byte [][] result = new byte[t.length][]; 2057 for (int i = 0; i < t.length; i++) { 2058 result[i] = Bytes.toBytes(t[i]); 2059 } 2060 return result; 2061 } 2062 2063 /** 2064 * @param t operands 2065 * @return Array of binary byte arrays made from passed array of binary strings 2066 */ 2067 public static byte[][] toBinaryByteArrays(final String[] t) { 2068 byte[][] result = new byte[t.length][]; 2069 for (int i = 0; i < t.length; i++) { 2070 result[i] = Bytes.toBytesBinary(t[i]); 2071 } 2072 return result; 2073 } 2074 2075 /** 2076 * @param column operand 2077 * @return A byte array of a byte array where first and only entry is 2078 * <code>column</code> 2079 */ 2080 public static byte [][] toByteArrays(final String column) { 2081 return toByteArrays(toBytes(column)); 2082 } 2083 2084 /** 2085 * @param column operand 2086 * @return A byte array of a byte array where first and only entry is 2087 * <code>column</code> 2088 */ 2089 public static byte [][] toByteArrays(final byte [] column) { 2090 byte [][] result = new byte[1][]; 2091 result[0] = column; 2092 return result; 2093 } 2094 2095 /** 2096 * Binary search for keys in indexes. 2097 * 2098 * @param arr array of byte arrays to search for 2099 * @param key the key you want to find 2100 * @param offset the offset in the key you want to find 2101 * @param length the length of the key 2102 * @param comparator a comparator to compare. 2103 * @return zero-based index of the key, if the key is present in the array. 2104 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2105 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2106 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2107 * means that this function can return 2N + 1 different values 2108 * ranging from -(N + 1) to N - 1. 2109 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use 2110 * {@link #binarySearch(byte[][], byte[], int, int)} instead. 2111 * @see #binarySearch(byte[][], byte[], int, int) 2112 * @see <a href="https://issues.apache.org/jira/browse/HBASE-13450">HBASE-13450</a> 2113 */ 2114 @Deprecated 2115 public static int binarySearch(byte [][]arr, byte []key, int offset, 2116 int length, RawComparator<?> comparator) { 2117 return binarySearch(arr, key, offset, length); 2118 } 2119 2120 /** 2121 * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR. 2122 * 2123 * @param arr array of byte arrays to search for 2124 * @param key the key you want to find 2125 * @param offset the offset in the key you want to find 2126 * @param length the length of the key 2127 * @return zero-based index of the key, if the key is present in the array. 2128 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2129 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2130 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2131 * means that this function can return 2N + 1 different values 2132 * ranging from -(N + 1) to N - 1. 2133 */ 2134 public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { 2135 int low = 0; 2136 int high = arr.length - 1; 2137 2138 while (low <= high) { 2139 int mid = low + ((high - low) >> 1); 2140 // we have to compare in this order, because the comparator order 2141 // has special logic when the 'left side' is a special key. 2142 int cmp = Bytes.BYTES_RAWCOMPARATOR 2143 .compare(key, offset, length, arr[mid], 0, arr[mid].length); 2144 // key lives above the midpoint 2145 if (cmp > 0) 2146 low = mid + 1; 2147 // key lives below the midpoint 2148 else if (cmp < 0) 2149 high = mid - 1; 2150 // BAM. how often does this really happen? 2151 else 2152 return mid; 2153 } 2154 return -(low + 1); 2155 } 2156 2157 /** 2158 * Binary search for keys in indexes. 2159 * 2160 * @param arr array of byte arrays to search for 2161 * @param key the key you want to find 2162 * @param comparator a comparator to compare. 2163 * @return zero-based index of the key, if the key is present in the array. 2164 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2165 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2166 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2167 * means that this function can return 2N + 1 different values 2168 * ranging from -(N + 1) to N - 1. 2169 * @return the index of the block 2170 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use 2171 * {@link #binarySearch(Cell[], Cell, CellComparator)} instead. 2172 * @see #binarySearch(Cell[], Cell, CellComparator) 2173 * @see <a href="https://issues.apache.org/jira/browse/HBASE-13450">HBASE-13450</a> 2174 */ 2175 @Deprecated 2176 public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) { 2177 int low = 0; 2178 int high = arr.length - 1; 2179 KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); 2180 while (low <= high) { 2181 int mid = low + ((high - low) >> 1); 2182 // we have to compare in this order, because the comparator order 2183 // has special logic when the 'left side' is a special key. 2184 r.setKey(arr[mid], 0, arr[mid].length); 2185 int cmp = comparator.compare(key, r); 2186 // key lives above the midpoint 2187 if (cmp > 0) 2188 low = mid + 1; 2189 // key lives below the midpoint 2190 else if (cmp < 0) 2191 high = mid - 1; 2192 // BAM. how often does this really happen? 2193 else 2194 return mid; 2195 } 2196 return - (low+1); 2197 } 2198 2199 /** 2200 * Binary search for keys in indexes. 2201 * 2202 * @param arr array of byte arrays to search for 2203 * @param key the key you want to find 2204 * @param comparator a comparator to compare. 2205 * @return zero-based index of the key, if the key is present in the array. 2206 * Otherwise, a value -(i + 1) such that the key is between arr[i - 2207 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define 2208 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2209 * means that this function can return 2N + 1 different values 2210 * ranging from -(N + 1) to N - 1. 2211 * @return the index of the block 2212 */ 2213 public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) { 2214 int low = 0; 2215 int high = arr.length - 1; 2216 while (low <= high) { 2217 int mid = low + ((high - low) >> 1); 2218 // we have to compare in this order, because the comparator order 2219 // has special logic when the 'left side' is a special key. 2220 int cmp = comparator.compare(key, arr[mid]); 2221 // key lives above the midpoint 2222 if (cmp > 0) 2223 low = mid + 1; 2224 // key lives below the midpoint 2225 else if (cmp < 0) 2226 high = mid - 1; 2227 // BAM. how often does this really happen? 2228 else 2229 return mid; 2230 } 2231 return - (low+1); 2232 } 2233 2234 /** 2235 * Bytewise binary increment/deincrement of long contained in byte array 2236 * on given amount. 2237 * 2238 * @param value - array of bytes containing long (length <= SIZEOF_LONG) 2239 * @param amount value will be incremented on (deincremented if negative) 2240 * @return array of bytes containing incremented long (length == SIZEOF_LONG) 2241 */ 2242 public static byte [] incrementBytes(byte[] value, long amount) 2243 { 2244 byte[] val = value; 2245 if (val.length < SIZEOF_LONG) { 2246 // Hopefully this doesn't happen too often. 2247 byte [] newvalue; 2248 if (val[0] < 0) { 2249 newvalue = new byte[]{-1, -1, -1, -1, -1, -1, -1, -1}; 2250 } else { 2251 newvalue = new byte[SIZEOF_LONG]; 2252 } 2253 System.arraycopy(val, 0, newvalue, newvalue.length - val.length, 2254 val.length); 2255 val = newvalue; 2256 } else if (val.length > SIZEOF_LONG) { 2257 throw new IllegalArgumentException("Increment Bytes - value too big: " + 2258 val.length); 2259 } 2260 if(amount == 0) return val; 2261 if(val[0] < 0){ 2262 return binaryIncrementNeg(val, amount); 2263 } 2264 return binaryIncrementPos(val, amount); 2265 } 2266 2267 /* increment/deincrement for positive value */ 2268 private static byte [] binaryIncrementPos(byte [] value, long amount) { 2269 long amo = amount; 2270 int sign = 1; 2271 if (amount < 0) { 2272 amo = -amount; 2273 sign = -1; 2274 } 2275 for(int i=0;i<value.length;i++) { 2276 int cur = ((int)amo % 256) * sign; 2277 amo = (amo >> 8); 2278 int val = value[value.length-i-1] & 0x0ff; 2279 int total = val + cur; 2280 if(total > 255) { 2281 amo += sign; 2282 total %= 256; 2283 } else if (total < 0) { 2284 amo -= sign; 2285 } 2286 value[value.length-i-1] = (byte)total; 2287 if (amo == 0) return value; 2288 } 2289 return value; 2290 } 2291 2292 /* increment/deincrement for negative value */ 2293 private static byte [] binaryIncrementNeg(byte [] value, long amount) { 2294 long amo = amount; 2295 int sign = 1; 2296 if (amount < 0) { 2297 amo = -amount; 2298 sign = -1; 2299 } 2300 for(int i=0;i<value.length;i++) { 2301 int cur = ((int)amo % 256) * sign; 2302 amo = (amo >> 8); 2303 int val = ((~value[value.length-i-1]) & 0x0ff) + 1; 2304 int total = cur - val; 2305 if(total >= 0) { 2306 amo += sign; 2307 } else if (total < -256) { 2308 amo -= sign; 2309 total %= 256; 2310 } 2311 value[value.length-i-1] = (byte)total; 2312 if (amo == 0) return value; 2313 } 2314 return value; 2315 } 2316 2317 /** 2318 * Writes a string as a fixed-size field, padded with zeros. 2319 */ 2320 public static void writeStringFixedSize(final DataOutput out, String s, 2321 int size) throws IOException { 2322 byte[] b = toBytes(s); 2323 if (b.length > size) { 2324 throw new IOException("Trying to write " + b.length + " bytes (" + 2325 toStringBinary(b) + ") into a field of length " + size); 2326 } 2327 2328 out.writeBytes(s); 2329 for (int i = 0; i < size - s.length(); ++i) 2330 out.writeByte(0); 2331 } 2332 2333 /** 2334 * Reads a fixed-size field and interprets it as a string padded with zeros. 2335 */ 2336 public static String readStringFixedSize(final DataInput in, int size) 2337 throws IOException { 2338 byte[] b = new byte[size]; 2339 in.readFully(b); 2340 int n = b.length; 2341 while (n > 0 && b[n - 1] == 0) 2342 --n; 2343 2344 return toString(b, 0, n); 2345 } 2346 2347 /** 2348 * Copy the byte array given in parameter and return an instance 2349 * of a new byte array with the same length and the same content. 2350 * @param bytes the byte array to duplicate 2351 * @return a copy of the given byte array 2352 */ 2353 public static byte [] copy(byte [] bytes) { 2354 if (bytes == null) return null; 2355 byte [] result = new byte[bytes.length]; 2356 System.arraycopy(bytes, 0, result, 0, bytes.length); 2357 return result; 2358 } 2359 2360 /** 2361 * Copy the byte array given in parameter and return an instance 2362 * of a new byte array with the same length and the same content. 2363 * @param bytes the byte array to copy from 2364 * @return a copy of the given designated byte array 2365 * @param offset 2366 * @param length 2367 */ 2368 public static byte [] copy(byte [] bytes, final int offset, final int length) { 2369 if (bytes == null) return null; 2370 byte [] result = new byte[length]; 2371 System.arraycopy(bytes, offset, result, 0, length); 2372 return result; 2373 } 2374 2375 /** 2376 * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from 2377 * somewhere. (mcorgan) 2378 * @param a Array to search. Entries must be sorted and unique. 2379 * @param fromIndex First index inclusive of "a" to include in the search. 2380 * @param toIndex Last index exclusive of "a" to include in the search. 2381 * @param key The byte to search for. 2382 * @return The index of key if found. If not found, return -(index + 1), where negative indicates 2383 * "not found" and the "index + 1" handles the "-0" case. 2384 */ 2385 public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { 2386 int unsignedKey = key & 0xff; 2387 int low = fromIndex; 2388 int high = toIndex - 1; 2389 2390 while (low <= high) { 2391 int mid = low + ((high - low) >> 1); 2392 int midVal = a[mid] & 0xff; 2393 2394 if (midVal < unsignedKey) { 2395 low = mid + 1; 2396 } else if (midVal > unsignedKey) { 2397 high = mid - 1; 2398 } else { 2399 return mid; // key found 2400 } 2401 } 2402 return -(low + 1); // key not found. 2403 } 2404 2405 /** 2406 * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding 2407 * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. 2408 * 2409 * @param input The byte[] to increment. 2410 * @return The incremented copy of "in". May be same length or 1 byte longer. 2411 */ 2412 public static byte[] unsignedCopyAndIncrement(final byte[] input) { 2413 byte[] copy = copy(input); 2414 if (copy == null) { 2415 throw new IllegalArgumentException("cannot increment null array"); 2416 } 2417 for (int i = copy.length - 1; i >= 0; --i) { 2418 if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum 2419 copy[i] = 0; 2420 } else { 2421 ++copy[i]; 2422 return copy; 2423 } 2424 } 2425 // we maxed out the array 2426 byte[] out = new byte[copy.length + 1]; 2427 out[0] = 1; 2428 System.arraycopy(copy, 0, out, 1, copy.length); 2429 return out; 2430 } 2431 2432 public static boolean equals(List<byte[]> a, List<byte[]> b) { 2433 if (a == null) { 2434 if (b == null) { 2435 return true; 2436 } 2437 return false; 2438 } 2439 if (b == null) { 2440 return false; 2441 } 2442 if (a.size() != b.size()) { 2443 return false; 2444 } 2445 for (int i = 0; i < a.size(); ++i) { 2446 if (!Bytes.equals(a.get(i), b.get(i))) { 2447 return false; 2448 } 2449 } 2450 return true; 2451 } 2452 2453 public static boolean isSorted(Collection<byte[]> arrays) { 2454 if (!CollectionUtils.isEmpty(arrays)) { 2455 byte[] previous = new byte[0]; 2456 for (byte[] array : arrays) { 2457 if (Bytes.compareTo(previous, array) > 0) { 2458 return false; 2459 } 2460 previous = array; 2461 } 2462 } 2463 return true; 2464 } 2465 2466 public static List<byte[]> getUtf8ByteArrays(List<String> strings) { 2467 if (CollectionUtils.isEmpty(strings)) { 2468 return Collections.emptyList(); 2469 } 2470 List<byte[]> byteArrays = new ArrayList<>(strings.size()); 2471 strings.forEach(s -> byteArrays.add(Bytes.toBytes(s))); 2472 return byteArrays; 2473 } 2474 2475 /** 2476 * Returns the index of the first appearance of the value {@code target} in 2477 * {@code array}. 2478 * 2479 * @param array an array of {@code byte} values, possibly empty 2480 * @param target a primitive {@code byte} value 2481 * @return the least index {@code i} for which {@code array[i] == target}, or 2482 * {@code -1} if no such index exists. 2483 */ 2484 public static int indexOf(byte[] array, byte target) { 2485 for (int i = 0; i < array.length; i++) { 2486 if (array[i] == target) { 2487 return i; 2488 } 2489 } 2490 return -1; 2491 } 2492 2493 /** 2494 * Returns the start position of the first occurrence of the specified {@code 2495 * target} within {@code array}, or {@code -1} if there is no such occurrence. 2496 * 2497 * <p>More formally, returns the lowest index {@code i} such that {@code 2498 * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly 2499 * the same elements as {@code target}. 2500 * 2501 * @param array the array to search for the sequence {@code target} 2502 * @param target the array to search for as a sub-sequence of {@code array} 2503 */ 2504 public static int indexOf(byte[] array, byte[] target) { 2505 checkNotNull(array, "array"); 2506 checkNotNull(target, "target"); 2507 if (target.length == 0) { 2508 return 0; 2509 } 2510 2511 outer: 2512 for (int i = 0; i < array.length - target.length + 1; i++) { 2513 for (int j = 0; j < target.length; j++) { 2514 if (array[i + j] != target[j]) { 2515 continue outer; 2516 } 2517 } 2518 return i; 2519 } 2520 return -1; 2521 } 2522 2523 /** 2524 * @param array an array of {@code byte} values, possibly empty 2525 * @param target a primitive {@code byte} value 2526 * @return {@code true} if {@code target} is present as an element anywhere in {@code array}. 2527 */ 2528 public static boolean contains(byte[] array, byte target) { 2529 return indexOf(array, target) > -1; 2530 } 2531 2532 /** 2533 * @param array an array of {@code byte} values, possibly empty 2534 * @param target an array of {@code byte} 2535 * @return {@code true} if {@code target} is present anywhere in {@code array} 2536 */ 2537 public static boolean contains(byte[] array, byte[] target) { 2538 return indexOf(array, target) > -1; 2539 } 2540 2541 /** 2542 * Fill given array with zeros. 2543 * @param b array which needs to be filled with zeros 2544 */ 2545 public static void zero(byte[] b) { 2546 zero(b, 0, b.length); 2547 } 2548 2549 /** 2550 * Fill given array with zeros at the specified position. 2551 * @param b 2552 * @param offset 2553 * @param length 2554 */ 2555 public static void zero(byte[] b, int offset, int length) { 2556 checkPositionIndex(offset, b.length, "offset"); 2557 checkArgument(length > 0, "length must be greater than 0"); 2558 checkPositionIndex(offset + length, b.length, "offset + length"); 2559 Arrays.fill(b, offset, offset + length, (byte) 0); 2560 } 2561 2562 private static final SecureRandom RNG = new SecureRandom(); 2563 2564 /** 2565 * Fill given array with random bytes. 2566 * @param b array which needs to be filled with random bytes 2567 */ 2568 public static void random(byte[] b) { 2569 RNG.nextBytes(b); 2570 } 2571 2572 /** 2573 * Fill given array with random bytes at the specified position. 2574 * @param b 2575 * @param offset 2576 * @param length 2577 */ 2578 public static void random(byte[] b, int offset, int length) { 2579 checkPositionIndex(offset, b.length, "offset"); 2580 checkArgument(length > 0, "length must be greater than 0"); 2581 checkPositionIndex(offset + length, b.length, "offset + length"); 2582 byte[] buf = new byte[length]; 2583 RNG.nextBytes(buf); 2584 System.arraycopy(buf, 0, b, offset, length); 2585 } 2586 2587 /** 2588 * Create a max byte array with the specified max byte count 2589 * @param maxByteCount the length of returned byte array 2590 * @return the created max byte array 2591 */ 2592 public static byte[] createMaxByteArray(int maxByteCount) { 2593 byte[] maxByteArray = new byte[maxByteCount]; 2594 for (int i = 0; i < maxByteArray.length; i++) { 2595 maxByteArray[i] = (byte) 0xff; 2596 } 2597 return maxByteArray; 2598 } 2599 2600 /** 2601 * Create a byte array which is multiple given bytes 2602 * @param srcBytes 2603 * @param multiNum 2604 * @return byte array 2605 */ 2606 public static byte[] multiple(byte[] srcBytes, int multiNum) { 2607 if (multiNum <= 0) { 2608 return new byte[0]; 2609 } 2610 byte[] result = new byte[srcBytes.length * multiNum]; 2611 for (int i = 0; i < multiNum; i++) { 2612 System.arraycopy(srcBytes, 0, result, i * srcBytes.length, 2613 srcBytes.length); 2614 } 2615 return result; 2616 } 2617 2618 private static final char[] HEX_CHARS = { 2619 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' 2620 }; 2621 2622 /** 2623 * Convert a byte range into a hex string 2624 */ 2625 public static String toHex(byte[] b, int offset, int length) { 2626 checkArgument(length <= Integer.MAX_VALUE / 2); 2627 int numChars = length * 2; 2628 char[] ch = new char[numChars]; 2629 for (int i = 0; i < numChars; i += 2) 2630 { 2631 byte d = b[offset + i/2]; 2632 ch[i] = HEX_CHARS[(d >> 4) & 0x0F]; 2633 ch[i+1] = HEX_CHARS[d & 0x0F]; 2634 } 2635 return new String(ch); 2636 } 2637 2638 /** 2639 * Convert a byte array into a hex string 2640 */ 2641 public static String toHex(byte[] b) { 2642 return toHex(b, 0, b.length); 2643 } 2644 2645 private static int hexCharToNibble(char ch) { 2646 if (ch <= '9' && ch >= '0') { 2647 return ch - '0'; 2648 } else if (ch >= 'a' && ch <= 'f') { 2649 return ch - 'a' + 10; 2650 } else if (ch >= 'A' && ch <= 'F') { 2651 return ch - 'A' + 10; 2652 } 2653 throw new IllegalArgumentException("Invalid hex char: " + ch); 2654 } 2655 2656 private static byte hexCharsToByte(char c1, char c2) { 2657 return (byte) ((hexCharToNibble(c1) << 4) | hexCharToNibble(c2)); 2658 } 2659 2660 /** 2661 * Create a byte array from a string of hash digits. The length of the 2662 * string must be a multiple of 2 2663 * @param hex 2664 */ 2665 public static byte[] fromHex(String hex) { 2666 checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2"); 2667 int len = hex.length(); 2668 byte[] b = new byte[len / 2]; 2669 for (int i = 0; i < len; i += 2) { 2670 b[i / 2] = hexCharsToByte(hex.charAt(i),hex.charAt(i+1)); 2671 } 2672 return b; 2673 } 2674 2675 /** 2676 * @param b 2677 * @param delimiter 2678 * @return Index of delimiter having started from start of <code>b</code> moving rightward. 2679 */ 2680 public static int searchDelimiterIndex(final byte[] b, int offset, final int length, 2681 final int delimiter) { 2682 if (b == null) { 2683 throw new IllegalArgumentException("Passed buffer is null"); 2684 } 2685 int result = -1; 2686 for (int i = offset; i < length + offset; i++) { 2687 if (b[i] == delimiter) { 2688 result = i; 2689 break; 2690 } 2691 } 2692 return result; 2693 } 2694 2695 /** 2696 * Find index of passed delimiter walking from end of buffer backwards. 2697 * 2698 * @param b 2699 * @param delimiter 2700 * @return Index of delimiter 2701 */ 2702 public static int searchDelimiterIndexInReverse(final byte[] b, final int offset, 2703 final int length, final int delimiter) { 2704 if (b == null) { 2705 throw new IllegalArgumentException("Passed buffer is null"); 2706 } 2707 int result = -1; 2708 for (int i = (offset + length) - 1; i >= offset; i--) { 2709 if (b[i] == delimiter) { 2710 result = i; 2711 break; 2712 } 2713 } 2714 return result; 2715 } 2716 2717 public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, 2718 int leftOffset, int rightOffset) { 2719 int length = Math.min(leftLength, rightLength); 2720 int result = 0; 2721 2722 while (result < length && left[leftOffset + result] == right[rightOffset + result]) { 2723 result++; 2724 } 2725 return result; 2726 } 2727}