001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkArgument; 021import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkNotNull; 022import static org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkPositionIndex; 023 024import com.google.protobuf.ByteString; 025import java.io.DataInput; 026import java.io.DataOutput; 027import java.io.IOException; 028import java.io.UnsupportedEncodingException; 029import java.math.BigDecimal; 030import java.math.BigInteger; 031import java.nio.ByteBuffer; 032import java.nio.charset.StandardCharsets; 033import java.security.SecureRandom; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collection; 037import java.util.Collections; 038import java.util.Comparator; 039import java.util.Iterator; 040import java.util.List; 041import java.util.Random; 042import org.apache.hadoop.hbase.Cell; 043import org.apache.hadoop.hbase.CellComparator; 044import org.apache.hadoop.hbase.KeyValue; 045import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; 046import org.apache.hadoop.io.RawComparator; 047import org.apache.hadoop.io.WritableComparator; 048import org.apache.hadoop.io.WritableUtils; 049import org.apache.yetus.audience.InterfaceAudience; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 054 055/** 056 * Utility class that handles byte arrays, conversions to/from other types, comparisons, hash code 057 * generation, manufacturing keys for HashMaps or HashSets, and can be used as key in maps or trees. 058 */ 059@InterfaceAudience.Public 060@edu.umd.cs.findbugs.annotations.SuppressWarnings( 061 value = "EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS", 062 justification = "It has been like this forever") 063@SuppressWarnings("MixedMutabilityReturnType") 064public class Bytes implements Comparable<Bytes> { 065 066 // Using the charset canonical name for String/byte[] conversions is much 067 // more efficient due to use of cached encoders/decoders. 068 private static final String UTF8_CSN = StandardCharsets.UTF_8.name(); 069 070 // HConstants.EMPTY_BYTE_ARRAY should be updated if this changed 071 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 072 073 private static final Logger LOG = LoggerFactory.getLogger(Bytes.class); 074 075 /** 076 * Size of boolean in bytes 077 */ 078 public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE; 079 080 /** 081 * Size of byte in bytes 082 */ 083 public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN; 084 085 /** 086 * Size of char in bytes 087 */ 088 public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE; 089 090 /** 091 * Size of double in bytes 092 */ 093 public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE; 094 095 /** 096 * Size of float in bytes 097 */ 098 public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE; 099 100 /** 101 * Size of int in bytes 102 */ 103 public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE; 104 105 /** 106 * Size of long in bytes 107 */ 108 public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE; 109 110 /** 111 * Size of short in bytes 112 */ 113 public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; 114 115 /** 116 * Mask to apply to a long to reveal the lower int only. Use like this: int i = 117 * (int)(0xFFFFFFFF00000000L ^ some_long_value); 118 */ 119 public static final long MASK_FOR_LOWER_INT_IN_LONG = 0xFFFFFFFF00000000L; 120 121 /** 122 * Estimate of size cost to pay beyond payload in jvm for instance of byte []. Estimate based on 123 * study of jhat and jprofiler numbers. 124 */ 125 // JHat says BU is 56 bytes. 126 // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?) 127 public static final int ESTIMATED_HEAP_TAX = 16; 128 129 @InterfaceAudience.Private 130 static final boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); 131 132 /** 133 * Returns length of the byte array, returning 0 if the array is null. Useful for calculating 134 * sizes. 135 * @param b byte array, which can be null 136 * @return 0 if b is null, otherwise returns length 137 */ 138 final public static int len(byte[] b) { 139 return b == null ? 0 : b.length; 140 } 141 142 private byte[] bytes; 143 private int offset; 144 private int length; 145 146 /** 147 * Create a zero-size sequence. 148 */ 149 public Bytes() { 150 super(); 151 } 152 153 /** 154 * Create a Bytes using the byte array as the initial value. 155 * @param bytes This array becomes the backing storage for the object. 156 */ 157 public Bytes(byte[] bytes) { 158 this(bytes, 0, bytes.length); 159 } 160 161 /** 162 * Set the new Bytes to the contents of the passed <code>ibw</code>. 163 * @param ibw the value to set this Bytes to. 164 */ 165 public Bytes(final Bytes ibw) { 166 this(ibw.get(), ibw.getOffset(), ibw.getLength()); 167 } 168 169 /** 170 * Set the value to a given byte range 171 * @param bytes the new byte range to set to 172 * @param offset the offset in newData to start at 173 * @param length the number of bytes in the range 174 */ 175 public Bytes(final byte[] bytes, final int offset, final int length) { 176 this.bytes = bytes; 177 this.offset = offset; 178 this.length = length; 179 } 180 181 /** 182 * Copy bytes from ByteString instance. 183 * @param byteString copy from 184 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 185 */ 186 @Deprecated 187 public Bytes(final ByteString byteString) { 188 this(byteString.toByteArray()); 189 } 190 191 /** 192 * Get the data from the Bytes. 193 * @return The data is only valid between offset and offset+length. 194 */ 195 public byte[] get() { 196 if (this.bytes == null) { 197 throw new IllegalStateException( 198 "Uninitialiized. Null constructor " + "called w/o accompaying readFields invocation"); 199 } 200 return this.bytes; 201 } 202 203 /** Use passed bytes as backing array for this instance. */ 204 public void set(final byte[] b) { 205 set(b, 0, b.length); 206 } 207 208 /** Use passed bytes as backing array for this instance. */ 209 public void set(final byte[] b, final int offset, final int length) { 210 this.bytes = b; 211 this.offset = offset; 212 this.length = length; 213 } 214 215 /** 216 * @return the number of valid bytes in the buffer 217 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #getLength()} instead. 218 * @see #getLength() 219 * @see <a href="https://issues.apache.org/jira/browse/HBASE-11862">HBASE-11862</a> 220 */ 221 @Deprecated 222 public int getSize() { 223 if (this.bytes == null) { 224 throw new IllegalStateException( 225 "Uninitialiized. Null constructor " + "called w/o accompaying readFields invocation"); 226 } 227 return this.length; 228 } 229 230 /** Returns the number of valid bytes in the buffer */ 231 public int getLength() { 232 if (this.bytes == null) { 233 throw new IllegalStateException( 234 "Uninitialiized. Null constructor " + "called w/o accompaying readFields invocation"); 235 } 236 return this.length; 237 } 238 239 /** Return the offset into the buffer. */ 240 public int getOffset() { 241 return this.offset; 242 } 243 244 /** 245 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 246 */ 247 @Deprecated 248 public ByteString toByteString() { 249 return ByteString.copyFrom(this.bytes, this.offset, this.length); 250 } 251 252 @Override 253 public int hashCode() { 254 return Bytes.hashCode(bytes, offset, length); 255 } 256 257 /** 258 * Define the sort order of the Bytes. 259 * @param that The other bytes writable 260 * @return Positive if left is bigger than right, 0 if they are equal, and negative if left is 261 * smaller than right. 262 */ 263 @Override 264 public int compareTo(Bytes that) { 265 return BYTES_RAWCOMPARATOR.compare(this.bytes, this.offset, this.length, that.bytes, 266 that.offset, that.length); 267 } 268 269 /** 270 * Compares the bytes in this object to the specified byte array n * @return Positive if left is 271 * bigger than right, 0 if they are equal, and negative if left is smaller than right. 272 */ 273 public int compareTo(final byte[] that) { 274 return BYTES_RAWCOMPARATOR.compare(this.bytes, this.offset, this.length, that, 0, that.length); 275 } 276 277 @Override 278 public boolean equals(Object right_obj) { 279 if (right_obj instanceof byte[]) { 280 return compareTo((byte[]) right_obj) == 0; 281 } 282 if (right_obj instanceof Bytes) { 283 return compareTo((Bytes) right_obj) == 0; 284 } 285 return false; 286 } 287 288 @Override 289 public String toString() { 290 return Bytes.toString(bytes, offset, length); 291 } 292 293 /** 294 * Convert a list of byte[] to an array 295 * @param array List of byte []. 296 * @return Array of byte []. 297 */ 298 public static byte[][] toArray(final List<byte[]> array) { 299 // List#toArray doesn't work on lists of byte []. 300 byte[][] results = new byte[array.size()][]; 301 for (int i = 0; i < array.size(); i++) { 302 results[i] = array.get(i); 303 } 304 return results; 305 } 306 307 /** Returns a copy of the bytes referred to by this writable */ 308 public byte[] copyBytes() { 309 return Arrays.copyOfRange(bytes, offset, offset + length); 310 } 311 312 /** Byte array comparator class. */ 313 @InterfaceAudience.Public 314 public static class ByteArrayComparator implements RawComparator<byte[]> { 315 316 public ByteArrayComparator() { 317 super(); 318 } 319 320 @Override 321 public int compare(byte[] left, byte[] right) { 322 return compareTo(left, right); 323 } 324 325 @Override 326 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { 327 return LexicographicalComparerHolder.BEST_COMPARER.compareTo(b1, s1, l1, b2, s2, l2); 328 } 329 } 330 331 /** 332 * A {@link ByteArrayComparator} that treats the empty array as the largest value. This is useful 333 * for comparing row end keys for regions. 334 */ 335 // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region 336 // boundaries. Thus semantically, we should treat empty byte array as the smallest value 337 // while comparing row keys, start keys etc; but as the largest value for comparing 338 // region boundaries for endKeys. 339 @InterfaceAudience.Public 340 public static class RowEndKeyComparator extends ByteArrayComparator { 341 @Override 342 public int compare(byte[] left, byte[] right) { 343 return compare(left, 0, left.length, right, 0, right.length); 344 } 345 346 @Override 347 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { 348 if (b1 == b2 && s1 == s2 && l1 == l2) { 349 return 0; 350 } 351 if (l1 == 0) { 352 return l2; // 0 or positive 353 } 354 if (l2 == 0) { 355 return -1; 356 } 357 return super.compare(b1, s1, l1, b2, s2, l2); 358 } 359 } 360 361 /** Pass this to TreeMaps where byte [] are keys. */ 362 public final static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator(); 363 364 /** Use comparing byte arrays, byte-by-byte */ 365 public final static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator(); 366 367 /** 368 * Read byte-array written with a WritableableUtils.vint prefix. 369 * @param in Input to read from. 370 * @return byte array read off <code>in</code> 371 * @throws IOException e 372 */ 373 public static byte[] readByteArray(final DataInput in) throws IOException { 374 int len = WritableUtils.readVInt(in); 375 if (len < 0) { 376 throw new NegativeArraySizeException(Integer.toString(len)); 377 } 378 byte[] result = new byte[len]; 379 in.readFully(result, 0, len); 380 return result; 381 } 382 383 /** 384 * Read byte-array written with a WritableableUtils.vint prefix. IOException is converted to a 385 * RuntimeException. 386 * @param in Input to read from. 387 * @return byte array read off <code>in</code> 388 */ 389 public static byte[] readByteArrayThrowsRuntime(final DataInput in) { 390 try { 391 return readByteArray(in); 392 } catch (Exception e) { 393 throw new RuntimeException(e); 394 } 395 } 396 397 /** 398 * Write byte-array with a WritableableUtils.vint prefix. 399 * @param out output stream to be written to 400 * @param b array to write 401 * @throws IOException e 402 */ 403 public static void writeByteArray(final DataOutput out, final byte[] b) throws IOException { 404 if (b == null) { 405 WritableUtils.writeVInt(out, 0); 406 } else { 407 writeByteArray(out, b, 0, b.length); 408 } 409 } 410 411 /** 412 * Write byte-array to out with a vint length prefix. 413 * @param out output stream 414 * @param b array 415 * @param offset offset into array 416 * @param length length past offset 417 * @throws IOException e 418 */ 419 public static void writeByteArray(final DataOutput out, final byte[] b, final int offset, 420 final int length) throws IOException { 421 WritableUtils.writeVInt(out, length); 422 out.write(b, offset, length); 423 } 424 425 /** 426 * Write byte-array from src to tgt with a vint length prefix. 427 * @param tgt target array 428 * @param tgtOffset offset into target array 429 * @param src source array 430 * @param srcOffset source offset 431 * @param srcLength source length 432 * @return New offset in src array. 433 */ 434 public static int writeByteArray(final byte[] tgt, final int tgtOffset, final byte[] src, 435 final int srcOffset, final int srcLength) { 436 byte[] vint = vintToBytes(srcLength); 437 System.arraycopy(vint, 0, tgt, tgtOffset, vint.length); 438 int offset = tgtOffset + vint.length; 439 System.arraycopy(src, srcOffset, tgt, offset, srcLength); 440 return offset + srcLength; 441 } 442 443 /** 444 * Put bytes at the specified byte array position. 445 * @param tgtBytes the byte array 446 * @param tgtOffset position in the array 447 * @param srcBytes array to write out 448 * @param srcOffset source offset 449 * @param srcLength source length 450 * @return incremented offset 451 */ 452 public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes, int srcOffset, 453 int srcLength) { 454 System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength); 455 return tgtOffset + srcLength; 456 } 457 458 /** 459 * Write a single byte out to the specified byte array position. 460 * @param bytes the byte array 461 * @param offset position in the array 462 * @param b byte to write out 463 * @return incremented offset 464 */ 465 public static int putByte(byte[] bytes, int offset, byte b) { 466 bytes[offset] = b; 467 return offset + 1; 468 } 469 470 /** 471 * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified. 472 * @param bytes the byte array 473 * @param offset position in the array 474 * @param buf ByteBuffer to write out 475 * @return incremented offset 476 */ 477 public static int putByteBuffer(byte[] bytes, int offset, ByteBuffer buf) { 478 int len = buf.remaining(); 479 buf.get(bytes, offset, len); 480 return offset + len; 481 } 482 483 /** 484 * Returns a new byte array, copied from the given {@code buf}, from the index 0 (inclusive) to 485 * the limit (exclusive), regardless of the current position. The position and the other index 486 * parameters are not changed. 487 * @param buf a byte buffer 488 * @return the byte array 489 * @see #getBytes(ByteBuffer) 490 */ 491 public static byte[] toBytes(ByteBuffer buf) { 492 ByteBuffer dup = buf.duplicate(); 493 dup.position(0); 494 return readBytes(dup); 495 } 496 497 private static byte[] readBytes(ByteBuffer buf) { 498 byte[] result = new byte[buf.remaining()]; 499 buf.get(result); 500 return result; 501 } 502 503 /** 504 * Convert a byte[] into a string. Charset is assumed to be UTF-8. 505 * @param b Presumed UTF-8 encoded byte array. 506 * @return String made from <code>b</code> 507 */ 508 public static String toString(final byte[] b) { 509 if (b == null) { 510 return null; 511 } 512 return toString(b, 0, b.length); 513 } 514 515 /** 516 * Joins two byte arrays together using a separator. 517 * @param b1 The first byte array. 518 * @param sep The separator to use. 519 * @param b2 The second byte array. 520 */ 521 public static String toString(final byte[] b1, String sep, final byte[] b2) { 522 return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length); 523 } 524 525 /** 526 * This method will convert utf8 encoded bytes into a string. If the given byte array is null, 527 * this method will return null. 528 * @param b Presumed UTF-8 encoded byte array. 529 * @param off offset into array 530 * @return String made from <code>b</code> or null 531 */ 532 public static String toString(final byte[] b, int off) { 533 if (b == null) { 534 return null; 535 } 536 int len = b.length - off; 537 if (len <= 0) { 538 return ""; 539 } 540 try { 541 return new String(b, off, len, UTF8_CSN); 542 } catch (UnsupportedEncodingException e) { 543 // should never happen! 544 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 545 } 546 } 547 548 /** 549 * This method will convert utf8 encoded bytes into a string. If the given byte array is null, 550 * this method will return null. 551 * @param b Presumed UTF-8 encoded byte array. 552 * @param off offset into array 553 * @param len length of utf-8 sequence 554 * @return String made from <code>b</code> or null 555 */ 556 public static String toString(final byte[] b, int off, int len) { 557 if (b == null) { 558 return null; 559 } 560 if (len == 0) { 561 return ""; 562 } 563 try { 564 return new String(b, off, len, UTF8_CSN); 565 } catch (UnsupportedEncodingException e) { 566 // should never happen! 567 throw new IllegalArgumentException("UTF8 encoding is not supported", e); 568 } 569 } 570 571 /** 572 * Write a printable representation of a byte array. 573 * @param b byte array n * @see #toStringBinary(byte[], int, int) 574 */ 575 public static String toStringBinary(final byte[] b) { 576 if (b == null) return "null"; 577 return toStringBinary(b, 0, b.length); 578 } 579 580 /** 581 * Converts the given byte buffer to a printable representation, from the index 0 (inclusive) to 582 * the limit (exclusive), regardless of the current position. The position and the other index 583 * parameters are not changed. 584 * @param buf a byte buffer 585 * @return a string representation of the buffer's binary contents 586 * @see #toBytes(ByteBuffer) 587 * @see #getBytes(ByteBuffer) 588 */ 589 public static String toStringBinary(ByteBuffer buf) { 590 if (buf == null) return "null"; 591 if (buf.hasArray()) { 592 return toStringBinary(buf.array(), buf.arrayOffset(), buf.limit()); 593 } 594 return toStringBinary(toBytes(buf)); 595 } 596 597 private static final char[] HEX_CHARS_UPPER = 598 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 599 600 /** 601 * Write a printable representation of a byte array. Non-printable characters are hex escaped in 602 * the format \\x%02X, eg: \x00 \x05 etc 603 * @param b array to write out 604 * @param off offset to start at 605 * @param len length to write 606 * @return string output 607 */ 608 public static String toStringBinary(final byte[] b, int off, int len) { 609 StringBuilder result = new StringBuilder(); 610 // Just in case we are passed a 'len' that is > buffer length... 611 if (off >= b.length) return result.toString(); 612 if (off + len > b.length) len = b.length - off; 613 for (int i = off; i < off + len; ++i) { 614 int ch = b[i] & 0xFF; 615 if (ch >= ' ' && ch <= '~' && ch != '\\') { 616 result.append((char) ch); 617 } else { 618 result.append("\\x"); 619 result.append(HEX_CHARS_UPPER[ch / 0x10]); 620 result.append(HEX_CHARS_UPPER[ch % 0x10]); 621 } 622 } 623 return result.toString(); 624 } 625 626 private static boolean isHexDigit(char c) { 627 return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9'); 628 } 629 630 /** 631 * Takes a ASCII digit in the range A-F0-9 and returns the corresponding integer/ordinal value. 632 * @param ch The hex digit. 633 * @return The converted hex value as a byte. 634 */ 635 public static byte toBinaryFromHex(byte ch) { 636 if (ch >= 'A' && ch <= 'F') return (byte) ((byte) 10 + (byte) (ch - 'A')); 637 // else 638 return (byte) (ch - '0'); 639 } 640 641 public static byte[] toBytesBinary(String in) { 642 // this may be bigger than we need, but let's be safe. 643 byte[] b = new byte[in.length()]; 644 int size = 0; 645 for (int i = 0; i < in.length(); ++i) { 646 char ch = in.charAt(i); 647 if (ch == '\\' && in.length() > i + 1 && in.charAt(i + 1) == 'x') { 648 // ok, take next 2 hex digits. 649 char hd1 = in.charAt(i + 2); 650 char hd2 = in.charAt(i + 3); 651 652 // they need to be A-F0-9: 653 if (!isHexDigit(hd1) || !isHexDigit(hd2)) { 654 // bogus escape code, ignore: 655 continue; 656 } 657 // turn hex ASCII digit -> number 658 byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2)); 659 660 b[size++] = d; 661 i += 3; // skip 3 662 } else { 663 b[size++] = (byte) ch; 664 } 665 } 666 // resize: 667 byte[] b2 = new byte[size]; 668 System.arraycopy(b, 0, b2, 0, size); 669 return b2; 670 } 671 672 /** 673 * Converts a string to a UTF-8 byte array. 674 * @param s string 675 * @return the byte array 676 */ 677 public static byte[] toBytes(String s) { 678 try { 679 return s.getBytes(UTF8_CSN); 680 } catch (UnsupportedEncodingException e) { 681 // should never happen! 682 throw new IllegalArgumentException("UTF8 decoding is not supported", e); 683 } 684 } 685 686 /** 687 * Convert a boolean to a byte array. True becomes -1 and false becomes 0. 688 * @param b value 689 * @return <code>b</code> encoded in a byte array. 690 */ 691 public static byte[] toBytes(final boolean b) { 692 return new byte[] { b ? (byte) -1 : (byte) 0 }; 693 } 694 695 /** 696 * Reverses {@link #toBytes(boolean)} 697 * @param b array 698 * @return True or false. 699 */ 700 public static boolean toBoolean(final byte[] b) { 701 if (b.length != 1) { 702 throw new IllegalArgumentException("Array has wrong size: " + b.length); 703 } 704 return b[0] != (byte) 0; 705 } 706 707 /** 708 * Convert a long value to a byte array using big-endian. 709 * @param val value to convert 710 * @return the byte array 711 */ 712 public static byte[] toBytes(long val) { 713 byte[] b = new byte[8]; 714 for (int i = 7; i > 0; i--) { 715 b[i] = (byte) val; 716 val >>>= 8; 717 } 718 b[0] = (byte) val; 719 return b; 720 } 721 722 /** 723 * Converts a byte array to a long value. Reverses {@link #toBytes(long)} 724 * @param bytes array 725 * @return the long value 726 */ 727 public static long toLong(byte[] bytes) { 728 return toLong(bytes, 0, SIZEOF_LONG); 729 } 730 731 /** 732 * Converts a byte array to a long value. Assumes there will be {@link #SIZEOF_LONG} bytes 733 * available. 734 * @param bytes bytes 735 * @param offset offset 736 * @return the long value 737 */ 738 public static long toLong(byte[] bytes, int offset) { 739 return toLong(bytes, offset, SIZEOF_LONG); 740 } 741 742 /** 743 * Converts a byte array to a long value. 744 * @param bytes array of bytes 745 * @param offset offset into array 746 * @param length length of data (must be {@link #SIZEOF_LONG}) 747 * @return the long value 748 * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or if there's not enough 749 * room in the array at the offset indicated. 750 */ 751 public static long toLong(byte[] bytes, int offset, final int length) { 752 if (length != SIZEOF_LONG || offset + length > bytes.length) { 753 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG); 754 } 755 return ConverterHolder.BEST_CONVERTER.toLong(bytes, offset, length); 756 } 757 758 private static IllegalArgumentException explainWrongLengthOrOffset(final byte[] bytes, 759 final int offset, final int length, final int expectedLength) { 760 String reason; 761 if (length != expectedLength) { 762 reason = "Wrong length: " + length + ", expected " + expectedLength; 763 } else { 764 reason = "offset (" + offset + ") + length (" + length + ") exceed the" 765 + " capacity of the array: " + bytes.length; 766 } 767 return new IllegalArgumentException(reason); 768 } 769 770 /** 771 * Put a long value out to the specified byte array position. 772 * @param bytes the byte array 773 * @param offset position in the array 774 * @param val long to write out 775 * @return incremented offset 776 * @throws IllegalArgumentException if the byte array given doesn't have enough room at the offset 777 * specified. 778 */ 779 public static int putLong(byte[] bytes, int offset, long val) { 780 if (bytes.length - offset < SIZEOF_LONG) { 781 throw new IllegalArgumentException("Not enough room to put a long at" + " offset " + offset 782 + " in a " + bytes.length + " byte array"); 783 } 784 return ConverterHolder.BEST_CONVERTER.putLong(bytes, offset, val); 785 } 786 787 /** 788 * Put a long value out to the specified byte array position (Unsafe). 789 * @param bytes the byte array 790 * @param offset position in the array 791 * @param val long to write out 792 * @return incremented offset 793 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 794 */ 795 @Deprecated 796 public static int putLongUnsafe(byte[] bytes, int offset, long val) { 797 return UnsafeAccess.putLong(bytes, offset, val); 798 } 799 800 /** 801 * Put a float value out to the specified byte array position. Presumes float encoded as IEEE 754 802 * floating-point "single format" 803 * @param bytes byte array 804 * @return Float made from passed byte array. 805 */ 806 public static float toFloat(byte[] bytes) { 807 return toFloat(bytes, 0); 808 } 809 810 /** 811 * Put a float value out to the specified byte array position. Presumes float encoded as IEEE 754 812 * floating-point "single format" 813 * @param bytes array to convert 814 * @param offset offset into array 815 * @return Float made from passed byte array. 816 */ 817 public static float toFloat(byte[] bytes, int offset) { 818 return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT)); 819 } 820 821 /** 822 * Put a float value out to the specified byte array position. 823 * @param bytes byte array 824 * @param offset offset to write to 825 * @param f float value 826 * @return New offset in <code>bytes</code> 827 */ 828 public static int putFloat(byte[] bytes, int offset, float f) { 829 return putInt(bytes, offset, Float.floatToRawIntBits(f)); 830 } 831 832 /** Return the float represented as byte[] */ 833 public static byte[] toBytes(final float f) { 834 // Encode it as int 835 return Bytes.toBytes(Float.floatToRawIntBits(f)); 836 } 837 838 /** Return double made from passed bytes. */ 839 public static double toDouble(final byte[] bytes) { 840 return toDouble(bytes, 0); 841 } 842 843 /** Return double made from passed bytes. */ 844 public static double toDouble(final byte[] bytes, final int offset) { 845 return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); 846 } 847 848 /** 849 * Put a double value out to the specified byte array position as the IEEE 754 double format. 850 * @param bytes byte array 851 * @param offset offset to write to 852 * @param d value 853 * @return New offset into array <code>bytes</code> 854 */ 855 public static int putDouble(byte[] bytes, int offset, double d) { 856 return putLong(bytes, offset, Double.doubleToLongBits(d)); 857 } 858 859 /** 860 * Serialize a double as the IEEE 754 double format output. The resultant array will be 8 bytes 861 * long. 862 * @param d value 863 * @return the double represented as byte [] 864 */ 865 public static byte[] toBytes(final double d) { 866 // Encode it as a long 867 return Bytes.toBytes(Double.doubleToRawLongBits(d)); 868 } 869 870 /** 871 * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt does. 872 * @param val value 873 * @return the byte array 874 */ 875 public static byte[] toBytes(int val) { 876 byte[] b = new byte[4]; 877 for (int i = 3; i > 0; i--) { 878 b[i] = (byte) val; 879 val >>>= 8; 880 } 881 b[0] = (byte) val; 882 return b; 883 } 884 885 /** 886 * Converts a byte array to an int value 887 * @param bytes byte array 888 * @return the int value 889 */ 890 public static int toInt(byte[] bytes) { 891 return toInt(bytes, 0, SIZEOF_INT); 892 } 893 894 /** 895 * Converts a byte array to an int value 896 * @param bytes byte array 897 * @param offset offset into array 898 * @return the int value 899 */ 900 public static int toInt(byte[] bytes, int offset) { 901 return toInt(bytes, offset, SIZEOF_INT); 902 } 903 904 /** 905 * Converts a byte array to an int value 906 * @param bytes byte array 907 * @param offset offset into array 908 * @param length length of int (has to be {@link #SIZEOF_INT}) 909 * @return the int value 910 * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or if there's not enough 911 * room in the array at the offset indicated. 912 */ 913 public static int toInt(byte[] bytes, int offset, final int length) { 914 if (length != SIZEOF_INT || offset + length > bytes.length) { 915 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT); 916 } 917 return ConverterHolder.BEST_CONVERTER.toInt(bytes, offset, length); 918 } 919 920 /** 921 * Converts a byte array to an int value (Unsafe version) 922 * @param bytes byte array 923 * @param offset offset into array 924 * @return the int value 925 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 926 */ 927 @Deprecated 928 public static int toIntUnsafe(byte[] bytes, int offset) { 929 return UnsafeAccess.toInt(bytes, offset); 930 } 931 932 /** 933 * Converts a byte array to an short value (Unsafe version) 934 * @param bytes byte array 935 * @param offset offset into array 936 * @return the short value 937 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 938 */ 939 @Deprecated 940 public static short toShortUnsafe(byte[] bytes, int offset) { 941 return UnsafeAccess.toShort(bytes, offset); 942 } 943 944 /** 945 * Converts a byte array to an long value (Unsafe version) 946 * @param bytes byte array 947 * @param offset offset into array 948 * @return the long value 949 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 950 */ 951 @Deprecated 952 public static long toLongUnsafe(byte[] bytes, int offset) { 953 return UnsafeAccess.toLong(bytes, offset); 954 } 955 956 /** 957 * Converts a byte array to an int value 958 * @param bytes byte array 959 * @param offset offset into array 960 * @param length how many bytes should be considered for creating int 961 * @return the int value 962 * @throws IllegalArgumentException if there's not enough room in the array at the offset 963 * indicated. 964 */ 965 public static int readAsInt(byte[] bytes, int offset, final int length) { 966 if (offset + length > bytes.length) { 967 throw new IllegalArgumentException("offset (" + offset + ") + length (" + length 968 + ") exceed the" + " capacity of the array: " + bytes.length); 969 } 970 int n = 0; 971 for (int i = offset; i < (offset + length); i++) { 972 n <<= 8; 973 n ^= bytes[i] & 0xFF; 974 } 975 return n; 976 } 977 978 /** 979 * Put an int value out to the specified byte array position. 980 * @param bytes the byte array 981 * @param offset position in the array 982 * @param val int to write out 983 * @return incremented offset 984 * @throws IllegalArgumentException if the byte array given doesn't have enough room at the offset 985 * specified. 986 */ 987 public static int putInt(byte[] bytes, int offset, int val) { 988 if (bytes.length - offset < SIZEOF_INT) { 989 throw new IllegalArgumentException("Not enough room to put an int at" + " offset " + offset 990 + " in a " + bytes.length + " byte array"); 991 } 992 return ConverterHolder.BEST_CONVERTER.putInt(bytes, offset, val); 993 } 994 995 /** 996 * Put an int value out to the specified byte array position (Unsafe). 997 * @param bytes the byte array 998 * @param offset position in the array 999 * @param val int to write out 1000 * @return incremented offset 1001 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1002 */ 1003 @Deprecated 1004 public static int putIntUnsafe(byte[] bytes, int offset, int val) { 1005 return UnsafeAccess.putInt(bytes, offset, val); 1006 } 1007 1008 /** 1009 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long. 1010 * @param val value 1011 * @return the byte array 1012 */ 1013 public static byte[] toBytes(short val) { 1014 byte[] b = new byte[SIZEOF_SHORT]; 1015 b[1] = (byte) val; 1016 val >>= 8; 1017 b[0] = (byte) val; 1018 return b; 1019 } 1020 1021 /** 1022 * Converts a byte array to a short value 1023 * @param bytes byte array 1024 * @return the short value 1025 */ 1026 public static short toShort(byte[] bytes) { 1027 return toShort(bytes, 0, SIZEOF_SHORT); 1028 } 1029 1030 /** 1031 * Converts a byte array to a short value 1032 * @param bytes byte array 1033 * @param offset offset into array 1034 * @return the short value 1035 */ 1036 public static short toShort(byte[] bytes, int offset) { 1037 return toShort(bytes, offset, SIZEOF_SHORT); 1038 } 1039 1040 /** 1041 * Converts a byte array to a short value 1042 * @param bytes byte array 1043 * @param offset offset into array 1044 * @param length length, has to be {@link #SIZEOF_SHORT} 1045 * @return the short value 1046 * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT} or if there's not 1047 * enough room in the array at the offset indicated. 1048 */ 1049 public static short toShort(byte[] bytes, int offset, final int length) { 1050 if (length != SIZEOF_SHORT || offset + length > bytes.length) { 1051 throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT); 1052 } 1053 return ConverterHolder.BEST_CONVERTER.toShort(bytes, offset, length); 1054 } 1055 1056 /** 1057 * Returns a new byte array, copied from the given {@code buf}, from the position (inclusive) to 1058 * the limit (exclusive). The position and the other index parameters are not changed. 1059 * @param buf a byte buffer 1060 * @return the byte array 1061 * @see #toBytes(ByteBuffer) 1062 */ 1063 public static byte[] getBytes(ByteBuffer buf) { 1064 return readBytes(buf.duplicate()); 1065 } 1066 1067 /** 1068 * Put a short value out to the specified byte array position. 1069 * @param bytes the byte array 1070 * @param offset position in the array 1071 * @param val short to write out 1072 * @return incremented offset 1073 * @throws IllegalArgumentException if the byte array given doesn't have enough room at the offset 1074 * specified. 1075 */ 1076 public static int putShort(byte[] bytes, int offset, short val) { 1077 if (bytes.length - offset < SIZEOF_SHORT) { 1078 throw new IllegalArgumentException("Not enough room to put a short at" + " offset " + offset 1079 + " in a " + bytes.length + " byte array"); 1080 } 1081 return ConverterHolder.BEST_CONVERTER.putShort(bytes, offset, val); 1082 } 1083 1084 /** 1085 * Put a short value out to the specified byte array position (Unsafe). 1086 * @param bytes the byte array 1087 * @param offset position in the array 1088 * @param val short to write out 1089 * @return incremented offset 1090 * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. 1091 */ 1092 @Deprecated 1093 public static int putShortUnsafe(byte[] bytes, int offset, short val) { 1094 return UnsafeAccess.putShort(bytes, offset, val); 1095 } 1096 1097 /** 1098 * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of 1099 * the short will be put into the array. The caller of the API need to make sure they will not 1100 * loose the value by doing so. This is useful to store an unsigned short which is represented as 1101 * int in other parts. 1102 * @param bytes the byte array 1103 * @param offset position in the array 1104 * @param val value to write out 1105 * @return incremented offset 1106 * @throws IllegalArgumentException if the byte array given doesn't have enough room at the offset 1107 * specified. 1108 */ 1109 public static int putAsShort(byte[] bytes, int offset, int val) { 1110 if (bytes.length - offset < SIZEOF_SHORT) { 1111 throw new IllegalArgumentException("Not enough room to put a short at" + " offset " + offset 1112 + " in a " + bytes.length + " byte array"); 1113 } 1114 bytes[offset + 1] = (byte) val; 1115 val >>= 8; 1116 bytes[offset] = (byte) val; 1117 return offset + SIZEOF_SHORT; 1118 } 1119 1120 /** Convert a BigDecimal value to a byte array */ 1121 public static byte[] toBytes(BigDecimal val) { 1122 byte[] valueBytes = val.unscaledValue().toByteArray(); 1123 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1124 int offset = putInt(result, 0, val.scale()); 1125 putBytes(result, offset, valueBytes, 0, valueBytes.length); 1126 return result; 1127 } 1128 1129 /** Converts a byte array to a BigDecimal */ 1130 public static BigDecimal toBigDecimal(byte[] bytes) { 1131 return toBigDecimal(bytes, 0, bytes.length); 1132 } 1133 1134 /** Converts a byte array to a BigDecimal value */ 1135 public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) { 1136 if (bytes == null || length < SIZEOF_INT + 1 || (offset + length > bytes.length)) { 1137 return null; 1138 } 1139 1140 int scale = toInt(bytes, offset); 1141 byte[] tcBytes = new byte[length - SIZEOF_INT]; 1142 System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT); 1143 return new BigDecimal(new BigInteger(tcBytes), scale); 1144 } 1145 1146 /** 1147 * Put a BigDecimal value out to the specified byte array position. 1148 * @param bytes the byte array 1149 * @param offset position in the array 1150 * @param val BigDecimal to write out 1151 * @return incremented offset 1152 */ 1153 public static int putBigDecimal(byte[] bytes, int offset, BigDecimal val) { 1154 if (bytes == null) { 1155 return offset; 1156 } 1157 1158 byte[] valueBytes = val.unscaledValue().toByteArray(); 1159 byte[] result = new byte[valueBytes.length + SIZEOF_INT]; 1160 offset = putInt(result, offset, val.scale()); 1161 return putBytes(result, offset, valueBytes, 0, valueBytes.length); 1162 } 1163 1164 /** 1165 * Encode a long value as a variable length integer. 1166 * @param vint Integer to make a vint of. 1167 * @return Vint as bytes array. 1168 */ 1169 public static byte[] vintToBytes(final long vint) { 1170 long i = vint; 1171 int size = WritableUtils.getVIntSize(i); 1172 byte[] result = new byte[size]; 1173 int offset = 0; 1174 if (i >= -112 && i <= 127) { 1175 result[offset] = (byte) i; 1176 return result; 1177 } 1178 1179 int len = -112; 1180 if (i < 0) { 1181 i ^= -1L; // take one's complement' 1182 len = -120; 1183 } 1184 1185 long tmp = i; 1186 while (tmp != 0) { 1187 tmp = tmp >> 8; 1188 len--; 1189 } 1190 1191 result[offset++] = (byte) len; 1192 1193 len = (len < -120) ? -(len + 120) : -(len + 112); 1194 1195 for (int idx = len; idx != 0; idx--) { 1196 int shiftbits = (idx - 1) * 8; 1197 long mask = 0xFFL << shiftbits; 1198 result[offset++] = (byte) ((i & mask) >> shiftbits); 1199 } 1200 return result; 1201 } 1202 1203 /** 1204 * Reads a zero-compressed encoded long from input buffer and returns it. 1205 * @param buffer buffer to convert 1206 * @return vint bytes as an integer. 1207 */ 1208 public static long bytesToVint(final byte[] buffer) { 1209 int offset = 0; 1210 byte firstByte = buffer[offset++]; 1211 int len = WritableUtils.decodeVIntSize(firstByte); 1212 if (len == 1) { 1213 return firstByte; 1214 } 1215 long i = 0; 1216 for (int idx = 0; idx < len - 1; idx++) { 1217 byte b = buffer[offset++]; 1218 i = i << 8; 1219 i = i | (b & 0xFF); 1220 } 1221 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1222 } 1223 1224 /** 1225 * Reads a zero-compressed encoded long from input buffer and returns it. 1226 * @param buffer Binary array 1227 * @param offset Offset into array at which vint begins. 1228 * @throws java.io.IOException e 1229 * @return deserialized long from buffer. 1230 * @deprecated since 0.98.12. Use {@link #readAsVLong(byte[],int)} instead. 1231 * @see #readAsVLong(byte[], int) 1232 * @see <a href="https://issues.apache.org/jira/browse/HBASE-6919">HBASE-6919</a> 1233 */ 1234 @Deprecated 1235 public static long readVLong(final byte[] buffer, final int offset) throws IOException { 1236 return readAsVLong(buffer, offset); 1237 } 1238 1239 /** 1240 * Reads a zero-compressed encoded long from input buffer and returns it. 1241 * @param buffer Binary array 1242 * @param offset Offset into array at which vint begins. 1243 * @return deserialized long from buffer. 1244 */ 1245 public static long readAsVLong(final byte[] buffer, final int offset) { 1246 byte firstByte = buffer[offset]; 1247 int len = WritableUtils.decodeVIntSize(firstByte); 1248 if (len == 1) { 1249 return firstByte; 1250 } 1251 long i = 0; 1252 for (int idx = 0; idx < len - 1; idx++) { 1253 byte b = buffer[offset + 1 + idx]; 1254 i = i << 8; 1255 i = i | (b & 0xFF); 1256 } 1257 return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 1258 } 1259 1260 /** 1261 * Lexicographically compare two arrays. 1262 * @param left left operand 1263 * @param right right operand 1264 * @return 0 if equal, < 0 if left is less than right, etc. 1265 */ 1266 public static int compareTo(final byte[] left, final byte[] right) { 1267 return LexicographicalComparerHolder.BEST_COMPARER.compareTo(left, 0, 1268 left == null ? 0 : left.length, right, 0, right == null ? 0 : right.length); 1269 } 1270 1271 /** 1272 * Lexicographically compare two arrays. 1273 * @param buffer1 left operand 1274 * @param buffer2 right operand 1275 * @param offset1 Where to start comparing in the left buffer 1276 * @param offset2 Where to start comparing in the right buffer 1277 * @param length1 How much to compare from the left buffer 1278 * @param length2 How much to compare from the right buffer 1279 * @return 0 if equal, < 0 if left is less than right, etc. 1280 */ 1281 public static int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, 1282 int length2) { 1283 return LexicographicalComparerHolder.BEST_COMPARER.compareTo(buffer1, offset1, length1, buffer2, 1284 offset2, length2); 1285 } 1286 1287 interface Comparer<T> { 1288 int compareTo(T buffer1, int offset1, int length1, T buffer2, int offset2, int length2); 1289 } 1290 1291 static abstract class Converter { 1292 abstract long toLong(byte[] bytes, int offset, int length); 1293 1294 abstract int putLong(byte[] bytes, int offset, long val); 1295 1296 abstract int toInt(byte[] bytes, int offset, final int length); 1297 1298 abstract int putInt(byte[] bytes, int offset, int val); 1299 1300 abstract short toShort(byte[] bytes, int offset, final int length); 1301 1302 abstract int putShort(byte[] bytes, int offset, short val); 1303 1304 } 1305 1306 @InterfaceAudience.Private 1307 static Comparer<byte[]> lexicographicalComparerJavaImpl() { 1308 return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; 1309 } 1310 1311 static class ConverterHolder { 1312 static final String UNSAFE_CONVERTER_NAME = 1313 ConverterHolder.class.getName() + "$UnsafeConverter"; 1314 1315 static final Converter BEST_CONVERTER = getBestConverter(); 1316 1317 /** 1318 * Returns the Unsafe-using Converter, or falls back to the pure-Java implementation if unable 1319 * to do so. 1320 */ 1321 static Converter getBestConverter() { 1322 try { 1323 Class<?> theClass = Class.forName(UNSAFE_CONVERTER_NAME); 1324 1325 // yes, UnsafeComparer does implement Comparer<byte[]> 1326 @SuppressWarnings("unchecked") 1327 Converter converter = (Converter) theClass.getConstructor().newInstance(); 1328 return converter; 1329 } catch (Throwable t) { // ensure we really catch *everything* 1330 return PureJavaConverter.INSTANCE; 1331 } 1332 } 1333 1334 protected static final class PureJavaConverter extends Converter { 1335 static final PureJavaConverter INSTANCE = new PureJavaConverter(); 1336 1337 private PureJavaConverter() { 1338 } 1339 1340 @Override 1341 long toLong(byte[] bytes, int offset, int length) { 1342 long l = 0; 1343 for (int i = offset; i < offset + length; i++) { 1344 l <<= 8; 1345 l ^= bytes[i] & 0xFF; 1346 } 1347 return l; 1348 } 1349 1350 @Override 1351 int putLong(byte[] bytes, int offset, long val) { 1352 for (int i = offset + 7; i > offset; i--) { 1353 bytes[i] = (byte) val; 1354 val >>>= 8; 1355 } 1356 bytes[offset] = (byte) val; 1357 return offset + SIZEOF_LONG; 1358 } 1359 1360 @Override 1361 int toInt(byte[] bytes, int offset, int length) { 1362 int n = 0; 1363 for (int i = offset; i < (offset + length); i++) { 1364 n <<= 8; 1365 n ^= bytes[i] & 0xFF; 1366 } 1367 return n; 1368 } 1369 1370 @Override 1371 int putInt(byte[] bytes, int offset, int val) { 1372 for (int i = offset + 3; i > offset; i--) { 1373 bytes[i] = (byte) val; 1374 val >>>= 8; 1375 } 1376 bytes[offset] = (byte) val; 1377 return offset + SIZEOF_INT; 1378 } 1379 1380 @Override 1381 short toShort(byte[] bytes, int offset, int length) { 1382 short n = 0; 1383 n = (short) ((n ^ bytes[offset]) & 0xFF); 1384 n = (short) (n << 8); 1385 n ^= (short) (bytes[offset + 1] & 0xFF); 1386 return n; 1387 } 1388 1389 @Override 1390 int putShort(byte[] bytes, int offset, short val) { 1391 bytes[offset + 1] = (byte) val; 1392 val >>= 8; 1393 bytes[offset] = (byte) val; 1394 return offset + SIZEOF_SHORT; 1395 } 1396 } 1397 1398 protected static final class UnsafeConverter extends Converter { 1399 1400 public UnsafeConverter() { 1401 } 1402 1403 static { 1404 if (!UNSAFE_UNALIGNED) { 1405 // It doesn't matter what we throw; 1406 // it's swallowed in getBestComparer(). 1407 throw new Error(); 1408 } 1409 1410 // sanity check - this should never fail 1411 if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) { 1412 throw new AssertionError(); 1413 } 1414 } 1415 1416 @Override 1417 long toLong(byte[] bytes, int offset, int length) { 1418 return UnsafeAccess.toLong(bytes, offset); 1419 } 1420 1421 @Override 1422 int putLong(byte[] bytes, int offset, long val) { 1423 return UnsafeAccess.putLong(bytes, offset, val); 1424 } 1425 1426 @Override 1427 int toInt(byte[] bytes, int offset, int length) { 1428 return UnsafeAccess.toInt(bytes, offset); 1429 } 1430 1431 @Override 1432 int putInt(byte[] bytes, int offset, int val) { 1433 return UnsafeAccess.putInt(bytes, offset, val); 1434 } 1435 1436 @Override 1437 short toShort(byte[] bytes, int offset, int length) { 1438 return UnsafeAccess.toShort(bytes, offset); 1439 } 1440 1441 @Override 1442 int putShort(byte[] bytes, int offset, short val) { 1443 return UnsafeAccess.putShort(bytes, offset, val); 1444 } 1445 } 1446 } 1447 1448 /** 1449 * Provides a lexicographical comparer implementation; either a Java implementation or a faster 1450 * implementation based on {@code Unsafe}. 1451 * <p> 1452 * Uses reflection to gracefully fall back to the Java implementation if {@code Unsafe} isn't 1453 * available. 1454 */ 1455 @InterfaceAudience.Private 1456 static class LexicographicalComparerHolder { 1457 static final String UNSAFE_COMPARER_NAME = 1458 LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; 1459 1460 static final Comparer<byte[]> BEST_COMPARER = getBestComparer(); 1461 1462 /** 1463 * Returns the Unsafe-using Comparer, or falls back to the pure-Java implementation if unable to 1464 * do so. 1465 */ 1466 static Comparer<byte[]> getBestComparer() { 1467 try { 1468 Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME); 1469 1470 // yes, UnsafeComparer does implement Comparer<byte[]> 1471 @SuppressWarnings("unchecked") 1472 Comparer<byte[]> comparer = (Comparer<byte[]>) theClass.getEnumConstants()[0]; 1473 return comparer; 1474 } catch (Throwable t) { // ensure we really catch *everything* 1475 return lexicographicalComparerJavaImpl(); 1476 } 1477 } 1478 1479 enum PureJavaComparer implements Comparer<byte[]> { 1480 INSTANCE; 1481 1482 @Override 1483 public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, 1484 int length2) { 1485 // Short circuit equal case 1486 if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { 1487 return 0; 1488 } 1489 // Bring WritableComparator code local 1490 int end1 = offset1 + length1; 1491 int end2 = offset2 + length2; 1492 for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { 1493 int a = (buffer1[i] & 0xff); 1494 int b = (buffer2[j] & 0xff); 1495 if (a != b) { 1496 return a - b; 1497 } 1498 } 1499 return length1 - length2; 1500 } 1501 } 1502 1503 @InterfaceAudience.Private 1504 enum UnsafeComparer implements Comparer<byte[]> { 1505 INSTANCE; 1506 1507 static { 1508 if (!UNSAFE_UNALIGNED) { 1509 // It doesn't matter what we throw; 1510 // it's swallowed in getBestComparer(). 1511 throw new Error(); 1512 } 1513 1514 // sanity check - this should never fail 1515 if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) { 1516 throw new AssertionError(); 1517 } 1518 } 1519 1520 /** 1521 * Lexicographically compare two arrays. 1522 * @param buffer1 left operand 1523 * @param buffer2 right operand 1524 * @param offset1 Where to start comparing in the left buffer 1525 * @param offset2 Where to start comparing in the right buffer 1526 * @param length1 How much to compare from the left buffer 1527 * @param length2 How much to compare from the right buffer 1528 * @return 0 if equal, < 0 if left is less than right, etc. 1529 */ 1530 @Override 1531 public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, 1532 int length2) { 1533 1534 // Short circuit equal case 1535 if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { 1536 return 0; 1537 } 1538 final int stride = 8; 1539 final int minLength = Math.min(length1, length2); 1540 int strideLimit = minLength & ~(stride - 1); 1541 final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1542 final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; 1543 int i; 1544 1545 /* 1546 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower 1547 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit. 1548 */ 1549 for (i = 0; i < strideLimit; i += stride) { 1550 long lw = HBasePlatformDependent.getLong(buffer1, offset1Adj + i); 1551 long rw = HBasePlatformDependent.getLong(buffer2, offset2Adj + i); 1552 if (lw != rw) { 1553 if (!UnsafeAccess.LITTLE_ENDIAN) { 1554 return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; 1555 } 1556 1557 /* 1558 * We want to compare only the first index where left[index] != right[index]. This 1559 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are 1560 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant 1561 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get 1562 * that least significant nonzero byte. This comparison logic is based on UnsignedBytes 1563 * comparator from guava v21 1564 */ 1565 int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7; 1566 return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF)); 1567 } 1568 } 1569 1570 // The epilogue to cover the last (minLength % stride) elements. 1571 for (; i < minLength; i++) { 1572 int a = (buffer1[offset1 + i] & 0xFF); 1573 int b = (buffer2[offset2 + i] & 0xFF); 1574 if (a != b) { 1575 return a - b; 1576 } 1577 } 1578 return length1 - length2; 1579 } 1580 } 1581 } 1582 1583 /** 1584 * Lexicographically determine the equality of two arrays. 1585 * @param left left operand 1586 * @param right right operand 1587 * @return True if equal 1588 */ 1589 public static boolean equals(final byte[] left, final byte[] right) { 1590 // Could use Arrays.equals? 1591 // noinspection SimplifiableConditionalExpression 1592 if (left == right) return true; 1593 if (left == null || right == null) return false; 1594 if (left.length != right.length) return false; 1595 if (left.length == 0) return true; 1596 1597 // Since we're often comparing adjacent sorted data, 1598 // it's usual to have equal arrays except for the very last byte 1599 // so check that first 1600 if (left[left.length - 1] != right[right.length - 1]) return false; 1601 1602 return compareTo(left, right) == 0; 1603 } 1604 1605 /** 1606 * Lexicographically determine the equality of two arrays. 1607 * @param left left operand 1608 * @param leftOffset offset into left operand 1609 * @param leftLen length of left operand 1610 * @param right right operand 1611 * @param rightOffset offset into right operand 1612 * @param rightLen length of right operand 1613 * @return True if equal 1614 */ 1615 public static boolean equals(final byte[] left, int leftOffset, int leftLen, final byte[] right, 1616 int rightOffset, int rightLen) { 1617 // short circuit case 1618 if (left == right && leftOffset == rightOffset && leftLen == rightLen) { 1619 return true; 1620 } 1621 // different lengths fast check 1622 if (leftLen != rightLen) { 1623 return false; 1624 } 1625 if (leftLen == 0) { 1626 return true; 1627 } 1628 1629 // Since we're often comparing adjacent sorted data, 1630 // it's usual to have equal arrays except for the very last byte 1631 // so check that first 1632 if (left[leftOffset + leftLen - 1] != right[rightOffset + rightLen - 1]) return false; 1633 1634 return LexicographicalComparerHolder.BEST_COMPARER.compareTo(left, leftOffset, leftLen, right, 1635 rightOffset, rightLen) == 0; 1636 } 1637 1638 /** 1639 * Lexicographically determine the equality of two byte[], one as ByteBuffer. 1640 * @param a left operand 1641 * @param buf right operand 1642 * @return True if equal 1643 */ 1644 public static boolean equals(byte[] a, ByteBuffer buf) { 1645 if (a == null) return buf == null; 1646 if (buf == null) return false; 1647 if (a.length != buf.remaining()) return false; 1648 1649 // Thou shalt not modify the original byte buffer in what should be read only operations. 1650 ByteBuffer b = buf.duplicate(); 1651 for (byte anA : a) { 1652 if (anA != b.get()) { 1653 return false; 1654 } 1655 } 1656 return true; 1657 } 1658 1659 /** 1660 * Return true if the byte array on the right is a prefix of the byte array on the left. 1661 */ 1662 public static boolean startsWith(byte[] bytes, byte[] prefix) { 1663 return bytes != null && prefix != null && bytes.length >= prefix.length 1664 && LexicographicalComparerHolder.BEST_COMPARER.compareTo(bytes, 0, prefix.length, prefix, 0, 1665 prefix.length) == 0; 1666 } 1667 1668 /** 1669 * Calculate a hash code from a given byte array. 1670 * @param b bytes to hash 1671 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the passed in array. This 1672 * method is what {@link org.apache.hadoop.io.Text} use calculating hash code. 1673 */ 1674 public static int hashCode(final byte[] b) { 1675 return hashCode(b, b.length); 1676 } 1677 1678 /** 1679 * Calculate a hash code from a given byte array. 1680 * @param b value 1681 * @param length length of the value 1682 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the passed in array. This 1683 * method is what {@link org.apache.hadoop.io.Text} use calculating hash code. 1684 */ 1685 public static int hashCode(final byte[] b, final int length) { 1686 return WritableComparator.hashBytes(b, length); 1687 } 1688 1689 /** 1690 * Calculate a hash code from a given byte array suitable for use as a key in maps. 1691 * @param b bytes to hash 1692 * @return A hash of <code>b</code> as an Integer that can be used as key in Maps. 1693 */ 1694 public static Integer mapKey(final byte[] b) { 1695 return hashCode(b); 1696 } 1697 1698 /** 1699 * Calculate a hash code from a given byte array suitable for use as a key in maps. 1700 * @param b bytes to hash 1701 * @param length length to hash 1702 * @return A hash of <code>b</code> as an Integer that can be used as key in Maps. 1703 */ 1704 public static Integer mapKey(final byte[] b, final int length) { 1705 return hashCode(b, length); 1706 } 1707 1708 /** 1709 * Concatenate byte arrays. 1710 * @param a lower half 1711 * @param b upper half 1712 * @return New array that has a in lower half and b in upper half. 1713 */ 1714 public static byte[] add(final byte[] a, final byte[] b) { 1715 return add(a, b, EMPTY_BYTE_ARRAY); 1716 } 1717 1718 /** 1719 * Concatenate byte arrays. 1720 * @param a first third 1721 * @param b second third 1722 * @param c third third 1723 * @return New array made from a, b and c 1724 */ 1725 public static byte[] add(final byte[] a, final byte[] b, final byte[] c) { 1726 byte[] result = new byte[a.length + b.length + c.length]; 1727 System.arraycopy(a, 0, result, 0, a.length); 1728 System.arraycopy(b, 0, result, a.length, b.length); 1729 System.arraycopy(c, 0, result, a.length + b.length, c.length); 1730 return result; 1731 } 1732 1733 /** 1734 * Concatenate byte arrays. 1735 * @param arrays all the arrays to concatenate together. 1736 * @return New array made from the concatenation of the given arrays. 1737 */ 1738 public static byte[] add(final byte[][] arrays) { 1739 int length = 0; 1740 for (int i = 0; i < arrays.length; i++) { 1741 length += arrays[i].length; 1742 } 1743 byte[] result = new byte[length]; 1744 int index = 0; 1745 for (int i = 0; i < arrays.length; i++) { 1746 System.arraycopy(arrays[i], 0, result, index, arrays[i].length); 1747 index += arrays[i].length; 1748 } 1749 return result; 1750 } 1751 1752 /** 1753 * Make a new byte array from a subset of bytes at the head of another. 1754 * @param a array 1755 * @param length amount of bytes to grab 1756 * @return First <code>length</code> bytes from <code>a</code> 1757 */ 1758 public static byte[] head(final byte[] a, final int length) { 1759 if (a.length < length) { 1760 return null; 1761 } 1762 byte[] result = new byte[length]; 1763 System.arraycopy(a, 0, result, 0, length); 1764 return result; 1765 } 1766 1767 /** 1768 * Make a new byte array from a subset of bytes at the tail of another. 1769 * @param a array 1770 * @param length amount of bytes to snarf 1771 * @return Last <code>length</code> bytes from <code>a</code> 1772 */ 1773 public static byte[] tail(final byte[] a, final int length) { 1774 if (a.length < length) { 1775 return null; 1776 } 1777 byte[] result = new byte[length]; 1778 System.arraycopy(a, a.length - length, result, 0, length); 1779 return result; 1780 } 1781 1782 /** 1783 * Make a new byte array from a subset of bytes at the head of another, zero padded as desired. 1784 * @param a array 1785 * @param length new array size 1786 * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes 1787 */ 1788 public static byte[] padHead(final byte[] a, final int length) { 1789 byte[] padding = new byte[length]; 1790 for (int i = 0; i < length; i++) { 1791 padding[i] = 0; 1792 } 1793 return add(padding, a); 1794 } 1795 1796 /** 1797 * Make a new byte array from a subset of bytes at the tail of another, zero padded as desired. 1798 * @param a array 1799 * @param length new array size 1800 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes 1801 */ 1802 public static byte[] padTail(final byte[] a, final int length) { 1803 byte[] padding = new byte[length]; 1804 for (int i = 0; i < length; i++) { 1805 padding[i] = 0; 1806 } 1807 return add(a, padding); 1808 } 1809 1810 /** 1811 * Split passed range. Expensive operation relatively. Uses BigInteger math. Useful splitting 1812 * ranges for MapReduce jobs. 1813 * @param a Beginning of range 1814 * @param b End of range 1815 * @param num Number of times to split range. Pass 1 if you want to split the range in two; i.e. 1816 * one split. 1817 * @return Array of dividing values 1818 */ 1819 public static byte[][] split(final byte[] a, final byte[] b, final int num) { 1820 return split(a, b, false, num); 1821 } 1822 1823 /** 1824 * Split passed range. Expensive operation relatively. Uses BigInteger math. Useful splitting 1825 * ranges for MapReduce jobs. 1826 * @param a Beginning of range 1827 * @param b End of range 1828 * @param inclusive Whether the end of range is prefix-inclusive or is considered an exclusive 1829 * boundary. Automatic splits are generally exclusive and manual splits with an 1830 * explicit range utilize an inclusive end of range. 1831 * @param num Number of times to split range. Pass 1 if you want to split the range in two; 1832 * i.e. one split. 1833 * @return Array of dividing values 1834 */ 1835 public static byte[][] split(final byte[] a, final byte[] b, boolean inclusive, final int num) { 1836 byte[][] ret = new byte[num + 2][]; 1837 int i = 0; 1838 Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num); 1839 if (iter == null) return null; 1840 for (byte[] elem : iter) { 1841 ret[i++] = elem; 1842 } 1843 return ret; 1844 } 1845 1846 /** 1847 * Iterate over keys within the passed range, splitting at an [a,b) boundary. 1848 */ 1849 public static Iterable<byte[]> iterateOnSplits(final byte[] a, final byte[] b, final int num) { 1850 return iterateOnSplits(a, b, false, num); 1851 } 1852 1853 /** 1854 * Iterate over keys within the passed range. 1855 */ 1856 public static Iterable<byte[]> iterateOnSplits(final byte[] a, final byte[] b, boolean inclusive, 1857 final int num) { 1858 byte[] aPadded; 1859 byte[] bPadded; 1860 if (a.length < b.length) { 1861 aPadded = padTail(a, b.length - a.length); 1862 bPadded = b; 1863 } else if (b.length < a.length) { 1864 aPadded = a; 1865 bPadded = padTail(b, a.length - b.length); 1866 } else { 1867 aPadded = a; 1868 bPadded = b; 1869 } 1870 if (compareTo(aPadded, bPadded) >= 0) { 1871 throw new IllegalArgumentException("b <= a"); 1872 } 1873 if (num <= 0) { 1874 throw new IllegalArgumentException("num cannot be <= 0"); 1875 } 1876 byte[] prependHeader = { 1, 0 }; 1877 final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); 1878 final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); 1879 BigInteger diffBI = stopBI.subtract(startBI); 1880 if (inclusive) { 1881 diffBI = diffBI.add(BigInteger.ONE); 1882 } 1883 final BigInteger splitsBI = BigInteger.valueOf(num + 1); 1884 // when diffBI < splitBI, use an additional byte to increase diffBI 1885 if (diffBI.compareTo(splitsBI) < 0) { 1886 byte[] aPaddedAdditional = new byte[aPadded.length + 1]; 1887 byte[] bPaddedAdditional = new byte[bPadded.length + 1]; 1888 for (int i = 0; i < aPadded.length; i++) { 1889 aPaddedAdditional[i] = aPadded[i]; 1890 } 1891 for (int j = 0; j < bPadded.length; j++) { 1892 bPaddedAdditional[j] = bPadded[j]; 1893 } 1894 aPaddedAdditional[aPadded.length] = 0; 1895 bPaddedAdditional[bPadded.length] = 0; 1896 return iterateOnSplits(aPaddedAdditional, bPaddedAdditional, inclusive, num); 1897 } 1898 final BigInteger intervalBI; 1899 try { 1900 intervalBI = diffBI.divide(splitsBI); 1901 } catch (Exception e) { 1902 LOG.error("Exception caught during division", e); 1903 return null; 1904 } 1905 1906 final Iterator<byte[]> iterator = new Iterator<byte[]>() { 1907 private int i = -1; 1908 1909 @Override 1910 public boolean hasNext() { 1911 return i < num + 1; 1912 } 1913 1914 @Override 1915 public byte[] next() { 1916 i++; 1917 if (i == 0) return a; 1918 if (i == num + 1) return b; 1919 1920 BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); 1921 byte[] padded = curBI.toByteArray(); 1922 if (padded[1] == 0) padded = tail(padded, padded.length - 2); 1923 else padded = tail(padded, padded.length - 1); 1924 return padded; 1925 } 1926 1927 @Override 1928 public void remove() { 1929 throw new UnsupportedOperationException(); 1930 } 1931 1932 }; 1933 1934 return new Iterable<byte[]>() { 1935 @Override 1936 public Iterator<byte[]> iterator() { 1937 return iterator; 1938 } 1939 }; 1940 } 1941 1942 /** 1943 * Calculate the hash code for a given range of bytes. 1944 * @param bytes array to hash 1945 * @param offset offset to start from 1946 * @param length length to hash 1947 */ 1948 public static int hashCode(byte[] bytes, int offset, int length) { 1949 int hash = 1; 1950 for (int i = offset; i < offset + length; i++) 1951 hash = (31 * hash) + bytes[i]; 1952 return hash; 1953 } 1954 1955 /** 1956 * Create an array of byte[] given an array of String. 1957 * @param t operands 1958 * @return Array of byte arrays made from passed array of Text 1959 */ 1960 public static byte[][] toByteArrays(final String[] t) { 1961 byte[][] result = new byte[t.length][]; 1962 for (int i = 0; i < t.length; i++) { 1963 result[i] = Bytes.toBytes(t[i]); 1964 } 1965 return result; 1966 } 1967 1968 /** 1969 * Create an array of byte[] given an array of String. 1970 * @param t operands 1971 * @return Array of binary byte arrays made from passed array of binary strings 1972 */ 1973 public static byte[][] toBinaryByteArrays(final String[] t) { 1974 byte[][] result = new byte[t.length][]; 1975 for (int i = 0; i < t.length; i++) { 1976 result[i] = Bytes.toBytesBinary(t[i]); 1977 } 1978 return result; 1979 } 1980 1981 /** 1982 * Create a byte[][] where first and only entry is <code>column</code> 1983 * @param column operand 1984 * @return A byte array of a byte array where first and only entry is <code>column</code> 1985 */ 1986 public static byte[][] toByteArrays(final String column) { 1987 return toByteArrays(toBytes(column)); 1988 } 1989 1990 /** 1991 * Create a byte[][] where first and only entry is <code>column</code> 1992 * @param column operand 1993 * @return A byte array of a byte array where first and only entry is <code>column</code> 1994 */ 1995 public static byte[][] toByteArrays(final byte[] column) { 1996 byte[][] result = new byte[1][]; 1997 result[0] = column; 1998 return result; 1999 } 2000 2001 /** 2002 * Binary search for keys in indexes. 2003 * @param arr array of byte arrays to search for 2004 * @param key the key you want to find 2005 * @param offset the offset in the key you want to find 2006 * @param length the length of the key 2007 * @param comparator a comparator to compare. 2008 * @return zero-based index of the key, if the key is present in the array. Otherwise, a value -(i 2009 * + 1) such that the key is between arr[i - 1] and arr[i] non-inclusively, where i is in 2010 * [0, i], if we define arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2011 * means that this function can return 2N + 1 different values ranging from -(N + 1) to N 2012 * - 1. 2013 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use 2014 * {@link #binarySearch(byte[][], byte[], int, int)} instead. 2015 * @see #binarySearch(byte[][], byte[], int, int) 2016 * @see <a href="https://issues.apache.org/jira/browse/HBASE-13450">HBASE-13450</a> 2017 */ 2018 @Deprecated 2019 public static int binarySearch(byte[][] arr, byte[] key, int offset, int length, 2020 RawComparator<?> comparator) { 2021 return binarySearch(arr, key, offset, length); 2022 } 2023 2024 /** 2025 * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR. 2026 * @param arr array of byte arrays to search for 2027 * @param key the key you want to find 2028 * @param offset the offset in the key you want to find 2029 * @param length the length of the key 2030 * @return zero-based index of the key, if the key is present in the array. Otherwise, a value -(i 2031 * + 1) such that the key is between arr[i - 1] and arr[i] non-inclusively, where i is in 2032 * [0, i], if we define arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2033 * means that this function can return 2N + 1 different values ranging from -(N + 1) to N 2034 * - 1. 2035 */ 2036 public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { 2037 int low = 0; 2038 int high = arr.length - 1; 2039 2040 while (low <= high) { 2041 int mid = low + ((high - low) >> 1); 2042 // we have to compare in this order, because the comparator order 2043 // has special logic when the 'left side' is a special key. 2044 int cmp = 2045 Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length); 2046 // key lives above the midpoint 2047 if (cmp > 0) low = mid + 1; 2048 // key lives below the midpoint 2049 else if (cmp < 0) high = mid - 1; 2050 // BAM. how often does this really happen? 2051 else return mid; 2052 } 2053 return -(low + 1); 2054 } 2055 2056 /** 2057 * Binary search for keys in indexes. 2058 * @param arr array of byte arrays to search for 2059 * @param key the key you want to find 2060 * @param comparator a comparator to compare. 2061 * @return zero-based index of the key, if the key is present in the array. Otherwise, a value -(i 2062 * + 1) such that the key is between arr[i - 1] and arr[i] non-inclusively, where i is in 2063 * [0, i], if we define arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2064 * means that this function can return 2N + 1 different values ranging from -(N + 1) to N 2065 * - 1. 2066 * @return the index of the block 2067 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use 2068 * {@link #binarySearch(Cell[], Cell, CellComparator)} instead. 2069 * @see #binarySearch(Cell[], Cell, CellComparator) 2070 * @see <a href="https://issues.apache.org/jira/browse/HBASE-13450">HBASE-13450</a> 2071 */ 2072 @Deprecated 2073 public static int binarySearch(byte[][] arr, Cell key, RawComparator<Cell> comparator) { 2074 int low = 0; 2075 int high = arr.length - 1; 2076 KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); 2077 while (low <= high) { 2078 int mid = low + ((high - low) >> 1); 2079 // we have to compare in this order, because the comparator order 2080 // has special logic when the 'left side' is a special key. 2081 r.setKey(arr[mid], 0, arr[mid].length); 2082 int cmp = comparator.compare(key, r); 2083 // key lives above the midpoint 2084 if (cmp > 0) low = mid + 1; 2085 // key lives below the midpoint 2086 else if (cmp < 0) high = mid - 1; 2087 // BAM. how often does this really happen? 2088 else return mid; 2089 } 2090 return -(low + 1); 2091 } 2092 2093 /** 2094 * Binary search for keys in indexes. 2095 * @param arr array of byte arrays to search for 2096 * @param key the key you want to find 2097 * @param comparator a comparator to compare. 2098 * @return zero-based index of the key, if the key is present in the array. Otherwise, a value -(i 2099 * + 1) such that the key is between arr[i - 1] and arr[i] non-inclusively, where i is in 2100 * [0, i], if we define arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above 2101 * means that this function can return 2N + 1 different values ranging from -(N + 1) to N 2102 * - 1. 2103 * @return the index of the block 2104 */ 2105 public static int binarySearch(Cell[] arr, Cell key, CellComparator comparator) { 2106 int low = 0; 2107 int high = arr.length - 1; 2108 while (low <= high) { 2109 int mid = low + ((high - low) >> 1); 2110 // we have to compare in this order, because the comparator order 2111 // has special logic when the 'left side' is a special key. 2112 int cmp = comparator.compare(key, arr[mid]); 2113 // key lives above the midpoint 2114 if (cmp > 0) low = mid + 1; 2115 // key lives below the midpoint 2116 else if (cmp < 0) high = mid - 1; 2117 // BAM. how often does this really happen? 2118 else return mid; 2119 } 2120 return -(low + 1); 2121 } 2122 2123 /** 2124 * Bytewise binary increment/deincrement of long contained in byte array on given amount. 2125 * @param value - array of bytes containing long (length <= SIZEOF_LONG) 2126 * @param amount value will be incremented on (deincremented if negative) 2127 * @return array of bytes containing incremented long (length == SIZEOF_LONG) 2128 */ 2129 public static byte[] incrementBytes(byte[] value, long amount) { 2130 byte[] val = value; 2131 if (val.length < SIZEOF_LONG) { 2132 // Hopefully this doesn't happen too often. 2133 byte[] newvalue; 2134 if (val[0] < 0) { 2135 newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 }; 2136 } else { 2137 newvalue = new byte[SIZEOF_LONG]; 2138 } 2139 System.arraycopy(val, 0, newvalue, newvalue.length - val.length, val.length); 2140 val = newvalue; 2141 } else if (val.length > SIZEOF_LONG) { 2142 throw new IllegalArgumentException("Increment Bytes - value too big: " + val.length); 2143 } 2144 if (amount == 0) return val; 2145 if (val[0] < 0) { 2146 return binaryIncrementNeg(val, amount); 2147 } 2148 return binaryIncrementPos(val, amount); 2149 } 2150 2151 /* increment/deincrement for positive value */ 2152 private static byte[] binaryIncrementPos(byte[] value, long amount) { 2153 long amo = amount; 2154 int sign = 1; 2155 if (amount < 0) { 2156 amo = -amount; 2157 sign = -1; 2158 } 2159 for (int i = 0; i < value.length; i++) { 2160 int cur = ((int) amo % 256) * sign; 2161 amo = (amo >> 8); 2162 int val = value[value.length - i - 1] & 0x0ff; 2163 int total = val + cur; 2164 if (total > 255) { 2165 amo += sign; 2166 total %= 256; 2167 } else if (total < 0) { 2168 amo -= sign; 2169 } 2170 value[value.length - i - 1] = (byte) total; 2171 if (amo == 0) return value; 2172 } 2173 return value; 2174 } 2175 2176 /* increment/deincrement for negative value */ 2177 private static byte[] binaryIncrementNeg(byte[] value, long amount) { 2178 long amo = amount; 2179 int sign = 1; 2180 if (amount < 0) { 2181 amo = -amount; 2182 sign = -1; 2183 } 2184 for (int i = 0; i < value.length; i++) { 2185 int cur = ((int) amo % 256) * sign; 2186 amo = (amo >> 8); 2187 int val = (~value[value.length - i - 1] & 0x0ff) + 1; 2188 int total = cur - val; 2189 if (total >= 0) { 2190 amo += sign; 2191 } else if (total < -256) { 2192 amo -= sign; 2193 total %= 256; 2194 } 2195 value[value.length - i - 1] = (byte) total; 2196 if (amo == 0) return value; 2197 } 2198 return value; 2199 } 2200 2201 /** 2202 * Writes a string as a fixed-size field, padded with zeros. 2203 */ 2204 public static void writeStringFixedSize(final DataOutput out, String s, int size) 2205 throws IOException { 2206 byte[] b = toBytes(s); 2207 if (b.length > size) { 2208 throw new IOException("Trying to write " + b.length + " bytes (" + toStringBinary(b) 2209 + ") into a field of length " + size); 2210 } 2211 2212 out.writeBytes(s); 2213 for (int i = 0; i < size - s.length(); ++i) 2214 out.writeByte(0); 2215 } 2216 2217 /** 2218 * Reads a fixed-size field and interprets it as a string padded with zeros. 2219 */ 2220 public static String readStringFixedSize(final DataInput in, int size) throws IOException { 2221 byte[] b = new byte[size]; 2222 in.readFully(b); 2223 int n = b.length; 2224 while (n > 0 && b[n - 1] == 0) 2225 --n; 2226 2227 return toString(b, 0, n); 2228 } 2229 2230 /** 2231 * Copy the byte array given in parameter and return an instance of a new byte array with the same 2232 * length and the same content. 2233 * @param bytes the byte array to duplicate 2234 * @return a copy of the given byte array 2235 */ 2236 public static byte[] copy(byte[] bytes) { 2237 if (bytes == null) return null; 2238 byte[] result = new byte[bytes.length]; 2239 System.arraycopy(bytes, 0, result, 0, bytes.length); 2240 return result; 2241 } 2242 2243 /** 2244 * Copy the byte array given in parameter and return an instance of a new byte array with the same 2245 * length and the same content. 2246 * @param bytes the byte array to copy from 2247 * @return a copy of the given designated byte array nn 2248 */ 2249 public static byte[] copy(byte[] bytes, final int offset, final int length) { 2250 if (bytes == null) return null; 2251 byte[] result = new byte[length]; 2252 System.arraycopy(bytes, offset, result, 0, length); 2253 return result; 2254 } 2255 2256 /** 2257 * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from 2258 * somewhere. (mcorgan) 2259 * @param a Array to search. Entries must be sorted and unique. 2260 * @param fromIndex First index inclusive of "a" to include in the search. 2261 * @param toIndex Last index exclusive of "a" to include in the search. 2262 * @param key The byte to search for. 2263 * @return The index of key if found. If not found, return -(index + 1), where negative indicates 2264 * "not found" and the "index + 1" handles the "-0" case. 2265 */ 2266 public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { 2267 int unsignedKey = key & 0xff; 2268 int low = fromIndex; 2269 int high = toIndex - 1; 2270 2271 while (low <= high) { 2272 int mid = low + ((high - low) >> 1); 2273 int midVal = a[mid] & 0xff; 2274 2275 if (midVal < unsignedKey) { 2276 low = mid + 1; 2277 } else if (midVal > unsignedKey) { 2278 high = mid - 1; 2279 } else { 2280 return mid; // key found 2281 } 2282 } 2283 return -(low + 1); // key not found. 2284 } 2285 2286 /** 2287 * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding 1 2288 * to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. 2289 * @param input The byte[] to increment. 2290 * @return The incremented copy of "in". May be same length or 1 byte longer. 2291 */ 2292 public static byte[] unsignedCopyAndIncrement(final byte[] input) { 2293 byte[] copy = copy(input); 2294 if (copy == null) { 2295 throw new IllegalArgumentException("cannot increment null array"); 2296 } 2297 for (int i = copy.length - 1; i >= 0; --i) { 2298 if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum 2299 copy[i] = 0; 2300 } else { 2301 ++copy[i]; 2302 return copy; 2303 } 2304 } 2305 // we maxed out the array 2306 byte[] out = new byte[copy.length + 1]; 2307 out[0] = 1; 2308 System.arraycopy(copy, 0, out, 1, copy.length); 2309 return out; 2310 } 2311 2312 public static boolean equals(List<byte[]> a, List<byte[]> b) { 2313 if (a == null) { 2314 if (b == null) { 2315 return true; 2316 } 2317 return false; 2318 } 2319 if (b == null) { 2320 return false; 2321 } 2322 if (a.size() != b.size()) { 2323 return false; 2324 } 2325 for (int i = 0; i < a.size(); ++i) { 2326 if (!Bytes.equals(a.get(i), b.get(i))) { 2327 return false; 2328 } 2329 } 2330 return true; 2331 } 2332 2333 public static boolean isSorted(Collection<byte[]> arrays) { 2334 if (!CollectionUtils.isEmpty(arrays)) { 2335 byte[] previous = new byte[0]; 2336 for (byte[] array : arrays) { 2337 if (Bytes.compareTo(previous, array) > 0) { 2338 return false; 2339 } 2340 previous = array; 2341 } 2342 } 2343 return true; 2344 } 2345 2346 public static List<byte[]> getUtf8ByteArrays(List<String> strings) { 2347 if (CollectionUtils.isEmpty(strings)) { 2348 return Collections.emptyList(); 2349 } 2350 List<byte[]> byteArrays = new ArrayList<>(strings.size()); 2351 strings.forEach(s -> byteArrays.add(Bytes.toBytes(s))); 2352 return byteArrays; 2353 } 2354 2355 /** 2356 * Returns the index of the first appearance of the value {@code target} in {@code array}. 2357 * @param array an array of {@code byte} values, possibly empty 2358 * @param target a primitive {@code byte} value 2359 * @return the least index {@code i} for which {@code array[i] == target}, or {@code -1} if no 2360 * such index exists. 2361 */ 2362 public static int indexOf(byte[] array, byte target) { 2363 for (int i = 0; i < array.length; i++) { 2364 if (array[i] == target) { 2365 return i; 2366 } 2367 } 2368 return -1; 2369 } 2370 2371 /** 2372 * Returns the start position of the first occurrence of the specified {@code 2373 * target} within {@code array}, or {@code -1} if there is no such occurrence. 2374 * <p> 2375 * More formally, returns the lowest index {@code i} such that {@code 2376 * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly the same elements 2377 * as {@code target}. 2378 * @param array the array to search for the sequence {@code target} 2379 * @param target the array to search for as a sub-sequence of {@code array} 2380 */ 2381 public static int indexOf(byte[] array, byte[] target) { 2382 checkNotNull(array, "array"); 2383 checkNotNull(target, "target"); 2384 if (target.length == 0) { 2385 return 0; 2386 } 2387 2388 outer: for (int i = 0; i < array.length - target.length + 1; i++) { 2389 for (int j = 0; j < target.length; j++) { 2390 if (array[i + j] != target[j]) { 2391 continue outer; 2392 } 2393 } 2394 return i; 2395 } 2396 return -1; 2397 } 2398 2399 /** 2400 * Return true if target is present as an element anywhere in the given array. 2401 * @param array an array of {@code byte} values, possibly empty 2402 * @param target a primitive {@code byte} value 2403 * @return {@code true} if {@code target} is present as an element anywhere in {@code array}. 2404 */ 2405 public static boolean contains(byte[] array, byte target) { 2406 return indexOf(array, target) > -1; 2407 } 2408 2409 /** 2410 * Return true if target is present as an element anywhere in the given array. 2411 * @param array an array of {@code byte} values, possibly empty 2412 * @param target an array of {@code byte} 2413 * @return {@code true} if {@code target} is present anywhere in {@code array} 2414 */ 2415 public static boolean contains(byte[] array, byte[] target) { 2416 return indexOf(array, target) > -1; 2417 } 2418 2419 /** 2420 * Fill given array with zeros. 2421 * @param b array which needs to be filled with zeros 2422 */ 2423 public static void zero(byte[] b) { 2424 zero(b, 0, b.length); 2425 } 2426 2427 /** 2428 * Fill given array with zeros at the specified position. nnn 2429 */ 2430 public static void zero(byte[] b, int offset, int length) { 2431 checkPositionIndex(offset, b.length, "offset"); 2432 checkArgument(length > 0, "length must be greater than 0"); 2433 checkPositionIndex(offset + length, b.length, "offset + length"); 2434 Arrays.fill(b, offset, offset + length, (byte) 0); 2435 } 2436 2437 // Pseudorandom random number generator, do not use SecureRandom here 2438 private static final Random RNG = new Random(); 2439 2440 /** 2441 * Fill given array with random bytes. 2442 * @param b array which needs to be filled with random bytes 2443 * <p> 2444 * If you want random bytes generated by a strong source of randomness use 2445 * {@link Bytes#secureRandom(byte[])}. 2446 * @param b array which needs to be filled with random bytes 2447 */ 2448 public static void random(byte[] b) { 2449 RNG.nextBytes(b); 2450 } 2451 2452 /** 2453 * Fill given array with random bytes at the specified position. 2454 * <p> 2455 * If you want random bytes generated by a strong source of randomness use 2456 * {@link Bytes#secureRandom(byte[], int, int)}. 2457 * @param b array which needs to be filled with random bytes 2458 * @param offset staring offset in array 2459 * @param length number of bytes to fill 2460 */ 2461 public static void random(byte[] b, int offset, int length) { 2462 checkPositionIndex(offset, b.length, "offset"); 2463 checkArgument(length > 0, "length must be greater than 0"); 2464 checkPositionIndex(offset + length, b.length, "offset + length"); 2465 byte[] buf = new byte[length]; 2466 RNG.nextBytes(buf); 2467 System.arraycopy(buf, 0, b, offset, length); 2468 } 2469 2470 // Bytes.secureRandom may be used to create key material. 2471 private static final SecureRandom SECURE_RNG = new SecureRandom(); 2472 2473 /** 2474 * Fill given array with random bytes using a strong random number generator. 2475 * @param b array which needs to be filled with random bytes 2476 */ 2477 public static void secureRandom(byte[] b) { 2478 SECURE_RNG.nextBytes(b); 2479 } 2480 2481 /** 2482 * Fill given array with random bytes at the specified position using a strong random number 2483 * generator. 2484 * @param b array which needs to be filled with random bytes 2485 * @param offset staring offset in array 2486 * @param length number of bytes to fill 2487 */ 2488 public static void secureRandom(byte[] b, int offset, int length) { 2489 checkPositionIndex(offset, b.length, "offset"); 2490 checkArgument(length > 0, "length must be greater than 0"); 2491 checkPositionIndex(offset + length, b.length, "offset + length"); 2492 byte[] buf = new byte[length]; 2493 SECURE_RNG.nextBytes(buf); 2494 System.arraycopy(buf, 0, b, offset, length); 2495 } 2496 2497 /** 2498 * Create a max byte array with the specified max byte count 2499 * @param maxByteCount the length of returned byte array 2500 * @return the created max byte array 2501 */ 2502 public static byte[] createMaxByteArray(int maxByteCount) { 2503 byte[] maxByteArray = new byte[maxByteCount]; 2504 for (int i = 0; i < maxByteArray.length; i++) { 2505 maxByteArray[i] = (byte) 0xff; 2506 } 2507 return maxByteArray; 2508 } 2509 2510 /** 2511 * Create a byte array which is multiple given bytes nn * @return byte array 2512 */ 2513 public static byte[] multiple(byte[] srcBytes, int multiNum) { 2514 if (multiNum <= 0) { 2515 return new byte[0]; 2516 } 2517 byte[] result = new byte[srcBytes.length * multiNum]; 2518 for (int i = 0; i < multiNum; i++) { 2519 System.arraycopy(srcBytes, 0, result, i * srcBytes.length, srcBytes.length); 2520 } 2521 return result; 2522 } 2523 2524 private static final char[] HEX_CHARS = 2525 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; 2526 2527 /** 2528 * Convert a byte range into a hex string 2529 */ 2530 public static String toHex(byte[] b, int offset, int length) { 2531 checkArgument(length <= Integer.MAX_VALUE / 2); 2532 int numChars = length * 2; 2533 char[] ch = new char[numChars]; 2534 for (int i = 0; i < numChars; i += 2) { 2535 byte d = b[offset + i / 2]; 2536 ch[i] = HEX_CHARS[(d >> 4) & 0x0F]; 2537 ch[i + 1] = HEX_CHARS[d & 0x0F]; 2538 } 2539 return new String(ch); 2540 } 2541 2542 /** 2543 * Convert a byte array into a hex string 2544 */ 2545 public static String toHex(byte[] b) { 2546 return toHex(b, 0, b.length); 2547 } 2548 2549 private static int hexCharToNibble(char ch) { 2550 if (ch <= '9' && ch >= '0') { 2551 return ch - '0'; 2552 } else if (ch >= 'a' && ch <= 'f') { 2553 return ch - 'a' + 10; 2554 } else if (ch >= 'A' && ch <= 'F') { 2555 return ch - 'A' + 10; 2556 } 2557 throw new IllegalArgumentException("Invalid hex char: " + ch); 2558 } 2559 2560 private static byte hexCharsToByte(char c1, char c2) { 2561 return (byte) ((hexCharToNibble(c1) << 4) | hexCharToNibble(c2)); 2562 } 2563 2564 /** 2565 * Create a byte array from a string of hash digits. The length of the string must be a multiple 2566 * of 2 n 2567 */ 2568 public static byte[] fromHex(String hex) { 2569 checkArgument(hex.length() % 2 == 0, "length must be a multiple of 2"); 2570 int len = hex.length(); 2571 byte[] b = new byte[len / 2]; 2572 for (int i = 0; i < len; i += 2) { 2573 b[i / 2] = hexCharsToByte(hex.charAt(i), hex.charAt(i + 1)); 2574 } 2575 return b; 2576 } 2577 2578 /** 2579 * Find index of passed delimiter. 2580 * @return Index of delimiter having started from start of <code>b</code> moving rightward. 2581 */ 2582 public static int searchDelimiterIndex(final byte[] b, int offset, final int length, 2583 final int delimiter) { 2584 if (b == null) { 2585 throw new IllegalArgumentException("Passed buffer is null"); 2586 } 2587 int result = -1; 2588 for (int i = offset; i < length + offset; i++) { 2589 if (b[i] == delimiter) { 2590 result = i; 2591 break; 2592 } 2593 } 2594 return result; 2595 } 2596 2597 /** 2598 * Find index of passed delimiter walking from end of buffer backwards. 2599 * @return Index of delimiter 2600 */ 2601 public static int searchDelimiterIndexInReverse(final byte[] b, final int offset, 2602 final int length, final int delimiter) { 2603 if (b == null) { 2604 throw new IllegalArgumentException("Passed buffer is null"); 2605 } 2606 int result = -1; 2607 for (int i = (offset + length) - 1; i >= offset; i--) { 2608 if (b[i] == delimiter) { 2609 result = i; 2610 break; 2611 } 2612 } 2613 return result; 2614 } 2615 2616 public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, 2617 int leftOffset, int rightOffset) { 2618 int length = Math.min(leftLength, rightLength); 2619 int result = 0; 2620 2621 while (result < length && left[leftOffset + result] == right[rightOffset + result]) { 2622 result++; 2623 } 2624 return result; 2625 } 2626}