001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hadoop.hbase.util.Order.ASCENDING; 021import static org.apache.hadoop.hbase.util.Order.DESCENDING; 022 023import java.math.BigDecimal; 024import java.math.MathContext; 025import java.math.RoundingMode; 026import java.nio.charset.Charset; 027import org.apache.yetus.audience.InterfaceAudience; 028 029/** 030 * Utility class that handles ordered byte arrays. That is, unlike {@link Bytes}, these methods 031 * produce byte arrays which maintain the sort order of the original values. 032 * <h3>Encoding Format summary</h3> 033 * <p> 034 * Each value is encoded as one or more bytes. The first byte of the encoding, its meaning, and a 035 * terse description of the bytes that follow is given by the following table: 036 * </p> 037 * <table summary="Encodings"> 038 * <tr> 039 * <th>Content Type</th> 040 * <th>Encoding</th> 041 * </tr> 042 * <tr> 043 * <td>NULL</td> 044 * <td>0x05</td> 045 * </tr> 046 * <tr> 047 * <td>negative infinity</td> 048 * <td>0x07</td> 049 * </tr> 050 * <tr> 051 * <td>negative large</td> 052 * <td>0x08, ~E, ~M</td> 053 * </tr> 054 * <tr> 055 * <td>negative medium</td> 056 * <td>0x13-E, ~M</td> 057 * </tr> 058 * <tr> 059 * <td>negative small</td> 060 * <td>0x14, -E, ~M</td> 061 * </tr> 062 * <tr> 063 * <td>zero</td> 064 * <td>0x15</td> 065 * </tr> 066 * <tr> 067 * <td>positive small</td> 068 * <td>0x16, ~-E, M</td> 069 * </tr> 070 * <tr> 071 * <td>positive medium</td> 072 * <td>0x17+E, M</td> 073 * </tr> 074 * <tr> 075 * <td>positive large</td> 076 * <td>0x22, E, M</td> 077 * </tr> 078 * <tr> 079 * <td>positive infinity</td> 080 * <td>0x23</td> 081 * </tr> 082 * <tr> 083 * <td>NaN</td> 084 * <td>0x25</td> 085 * </tr> 086 * <tr> 087 * <td>fixed-length 32-bit integer</td> 088 * <td>0x27, I</td> 089 * </tr> 090 * <tr> 091 * <td>fixed-length 64-bit integer</td> 092 * <td>0x28, I</td> 093 * </tr> 094 * <tr> 095 * <td>fixed-length 8-bit integer</td> 096 * <td>0x29</td> 097 * </tr> 098 * <tr> 099 * <td>fixed-length 16-bit integer</td> 100 * <td>0x2a</td> 101 * </tr> 102 * <tr> 103 * <td>fixed-length 32-bit float</td> 104 * <td>0x30, F</td> 105 * </tr> 106 * <tr> 107 * <td>fixed-length 64-bit float</td> 108 * <td>0x31, F</td> 109 * </tr> 110 * <tr> 111 * <td>TEXT</td> 112 * <td>0x33, T</td> 113 * </tr> 114 * <tr> 115 * <td>variable length BLOB</td> 116 * <td>0x35, B</td> 117 * </tr> 118 * <tr> 119 * <td>byte-for-byte BLOB</td> 120 * <td>0x36, X</td> 121 * </tr> 122 * </table> 123 * <h3>Null Encoding</h3> 124 * <p> 125 * Each value that is a NULL encodes as a single byte of 0x05. Since every other value encoding 126 * begins with a byte greater than 0x05, this forces NULL values to sort first. 127 * </p> 128 * <h3>Text Encoding</h3> 129 * <p> 130 * Each text value begins with a single byte of 0x33 and ends with a single byte of 0x00. There are 131 * zero or more intervening bytes that encode the text value. The intervening bytes are chosen so 132 * that the encoding will sort in the desired collating order. The intervening bytes may not contain 133 * a 0x00 character; the only 0x00 byte allowed in a text encoding is the final byte. 134 * </p> 135 * <p> 136 * The text encoding ends in 0x00 in order to ensure that when there are two strings where one is a 137 * prefix of the other that the shorter string will sort first. 138 * </p> 139 * <h3>Binary Encoding</h3> 140 * <p> 141 * There are two encoding strategies for binary fields, referred to as "BlobVar" and "BlobCopy". 142 * BlobVar is less efficient in both space and encoding time. It has no limitations on the range of 143 * encoded values. BlobCopy is a byte-for-byte copy of the input data followed by a termination 144 * byte. It is extremely fast to encode and decode. It carries the restriction of not allowing a 145 * 0x00 value in the input byte[] as this value is used as the termination byte. 146 * </p> 147 * <h4>BlobVar</h4> 148 * <p> 149 * "BlobVar" encodes the input byte[] in a manner similar to a variable length integer encoding. As 150 * with the other {@code OrderedBytes} encodings, the first encoded byte is used to indicate what 151 * kind of value follows. This header byte is 0x37 for BlobVar encoded values. As with the 152 * traditional varint encoding, the most significant bit of each subsequent encoded {@code byte} is 153 * used as a continuation marker. The 7 remaining bits contain the 7 most significant bits of the 154 * first unencoded byte. The next encoded byte starts with a continuation marker in the MSB. The 155 * least significant bit from the first unencoded byte follows, and the remaining 6 bits contain the 156 * 6 MSBs of the second unencoded byte. The encoding continues, encoding 7 bytes on to 8 encoded 157 * bytes. The MSB of the final encoded byte contains a termination marker rather than a continuation 158 * marker, and any remaining bits from the final input byte. Any trailing bits in the final encoded 159 * byte are zeros. 160 * </p> 161 * <h4>BlobCopy</h4> 162 * <p> 163 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38 as the header byte, and 164 * is terminated by 0x00 in the DESCENDING case. This alternative encoding is faster and more 165 * space-efficient, but it cannot accept values containing a 0x00 byte in DESCENDING order. 166 * </p> 167 * <h3>Variable-length Numeric Encoding</h3> 168 * <p> 169 * Numeric values must be coded so as to sort in numeric order. We assume that numeric values can be 170 * both integer and floating point values. Clients must be careful to use inspection methods for 171 * encoded values (such as {@link #isNumericInfinite(PositionedByteRange)} and 172 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding values into object which 173 * do not support these numeric concepts (such as {@link Long} and {@link BigDecimal}). 174 * </p> 175 * <p> 176 * Simplest cases first: If the numeric value is a NaN, then the encoding is a single byte of 0x25. 177 * This causes NaN values to sort after every other numeric value. 178 * </p> 179 * <p> 180 * If the numeric value is a negative infinity then the encoding is a single byte of 0x07. Since 181 * every other numeric value except NaN has a larger initial byte, this encoding ensures that 182 * negative infinity will sort prior to every other numeric value other than NaN. 183 * </p> 184 * <p> 185 * If the numeric value is a positive infinity then the encoding is a single byte of 0x23. Every 186 * other numeric value encoding begins with a smaller byte, ensuring that positive infinity always 187 * sorts last among numeric values. 0x23 is also smaller than 0x33, the initial byte of a text 188 * value, ensuring that every numeric value sorts before every text value. 189 * </p> 190 * <p> 191 * If the numeric value is exactly zero then it is encoded as a single byte of 0x15. Finite negative 192 * values will have initial bytes of 0x08 through 0x14 and finite positive values will have initial 193 * bytes of 0x16 through 0x22. 194 * </p> 195 * <p> 196 * For all numeric values, we compute a mantissa M and an exponent E. The mantissa is a base-100 197 * representation of the value. The exponent E determines where to put the decimal point. 198 * </p> 199 * <p> 200 * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is X 201 * (hence X≥0 and X≤99) then the byte value will be 2*X+1 for every byte of the mantissa, 202 * except for the last byte which will be 2*X+0. The mantissa must be the minimum number of bytes 203 * necessary to represent the value; trailing X==0 digits are omitted. This means that the mantissa 204 * will never contain a byte with the value 0x00. 205 * </p> 206 * <p> 207 * If we assume all digits of the mantissa occur to the right of the decimal point, then the 208 * exponent E is the power of one hundred by which one must multiply the mantissa to recover the 209 * original value. 210 * </p> 211 * <p> 212 * Values are classified as large, medium, or small according to the value of E. If E is 11 or more, 213 * the value is large. For E between 0 and 10, the value is medium. For E less than zero, the value 214 * is small. 215 * </p> 216 * <p> 217 * Large positive values are encoded as a single byte 0x22 followed by E as a varint and then M. 218 * Medium positive values are a single byte of 0x17+E followed by M. Small positive values are 219 * encoded as a single byte 0x16 followed by the ones-complement of the varint for -E followed by M. 220 * </p> 221 * <p> 222 * Small negative values are encoded as a single byte 0x14 followed by -E as a varint and then the 223 * ones-complement of M. Medium negative values are encoded as a byte 0x13-E followed by the 224 * ones-complement of M. Large negative values consist of the single byte 0x08 followed by the 225 * ones-complement of the varint encoding of E followed by the ones-complement of M. 226 * </p> 227 * <h3>Fixed-length Integer Encoding</h3> 228 * <p> 229 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte format. All 8-byte 230 * integers are serialized to the equivelant 9-byte format. Serialization is performed by writing a 231 * header byte, inverting the integer sign bit and writing the resulting bytes to the byte array in 232 * big endian order. 233 * </p> 234 * <h3>Fixed-length Floating Point Encoding</h3> 235 * <p> 236 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte encoding format, 237 * respectively. The format is identical, save for the precision respected in each step of the 238 * operation. 239 * <p> 240 * This format ensures the following total ordering of floating point values: 241 * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < -Float.MIN_VALUE < -0.0 < +0.0; 242 * < Float.MIN_VALUE < ... < Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN 243 * </p> 244 * <p> 245 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit single precision float 246 * consists of a sign bit, 8-bit unsigned exponent encoded in offset-127 notation, and a 23-bit 247 * significand. The format is described further in the 248 * <a href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision Floating Point 249 * Wikipedia page</a> 250 * </p> 251 * <p> 252 * The value of a normal float is -1 <sup>sign bit</sup> × 2<sup>exponent - 127</sup> × 253 * 1.significand 254 * </p> 255 * <p> 256 * The IEE754 floating point format already preserves sort ordering for positive floating point 257 * numbers when the raw bytes are compared in most significant byte order. This is discussed further 258 * at <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm"> 259 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a> 260 * </p> 261 * <p> 262 * Thus, we need only ensure that negative numbers sort in the the exact opposite order as positive 263 * numbers (so that say, negative infinity is less than negative 1), and that all negative numbers 264 * compare less than any positive number. To accomplish this, we invert the sign bit of all floating 265 * point numbers, and we also invert the exponent and significand bits if the floating point number 266 * was negative. 267 * </p> 268 * <p> 269 * More specifically, we first store the floating point bits into a 32-bit int {@code j} using 270 * {@link Float#floatToIntBits}. This method collapses all NaNs into a single, canonical NaN value 271 * but otherwise leaves the bits unchanged. We then compute 272 * </p> 273 * 274 * <pre> 275 * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE 276 * </pre> 277 * <p> 278 * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw 279 * bytes of {@code j} in most significant byte order is equivalent to performing a single precision 280 * floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't compare 281 * equal to anything when performing floating point comparisons). 282 * </p> 283 * <p> 284 * The resulting integer is then converted into a byte array by serializing the integer one byte at 285 * a time in most significant byte order. The serialized integer is prefixed by a single header 286 * byte. All serialized values are 5 bytes in length. 287 * </p> 288 * <p> 289 * {@code OrderedBytes} encodings are heavily influenced by the 290 * <a href="http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key Encoding</a>. Slight 291 * deviations are make in the interest of order correctness and user extensibility. Fixed-width 292 * {@code Long} and {@link Double} encodings are based on implementations from the now defunct 293 * Orderly library. 294 * </p> 295 */ 296@InterfaceAudience.Public 297public class OrderedBytes { 298 299 /* 300 * These constants define header bytes used to identify encoded values. Note that the values here 301 * are not exhaustive as the Numeric format encodes portions of its value within the header byte. 302 * The values listed here are directly applied to persisted data -- DO NOT modify the values 303 * specified here. Instead, gaps are placed intentionally between values so that new 304 * implementations can be inserted into the total ordering enforced here. 305 */ 306 private static final byte NULL = 0x05; 307 // room for 1 expansion type 308 private static final byte NEG_INF = 0x07; 309 private static final byte NEG_LARGE = 0x08; 310 private static final byte NEG_MED_MIN = 0x09; 311 private static final byte NEG_MED_MAX = 0x13; 312 private static final byte NEG_SMALL = 0x14; 313 private static final byte ZERO = 0x15; 314 private static final byte POS_SMALL = 0x16; 315 private static final byte POS_MED_MIN = 0x17; 316 private static final byte POS_MED_MAX = 0x21; 317 private static final byte POS_LARGE = 0x22; 318 private static final byte POS_INF = 0x23; 319 // room for 2 expansion type 320 private static final byte NAN = 0x26; 321 // room for 2 expansion types 322 private static final byte FIXED_INT8 = 0x29; 323 private static final byte FIXED_INT16 = 0x2a; 324 private static final byte FIXED_INT32 = 0x2b; 325 private static final byte FIXED_INT64 = 0x2c; 326 // room for 3 expansion types 327 private static final byte FIXED_FLOAT32 = 0x30; 328 private static final byte FIXED_FLOAT64 = 0x31; 329 // room for 2 expansion type 330 private static final byte TEXT = 0x34; 331 // room for 2 expansion type 332 private static final byte BLOB_VAR = 0x37; 333 private static final byte BLOB_COPY = 0x38; 334 335 /* 336 * The following constant values are used by encoding implementations 337 */ 338 339 public static final Charset UTF8 = Charset.forName("UTF-8"); 340 private static final byte TERM = 0x00; 341 342 /** 343 * Max precision guaranteed to fit into a {@code long}. 344 */ 345 public static final int MAX_PRECISION = 31; 346 347 /** 348 * The context used to normalize {@link BigDecimal} values. 349 */ 350 public static final MathContext DEFAULT_MATH_CONTEXT = 351 new MathContext(MAX_PRECISION, RoundingMode.HALF_UP); 352 353 /** 354 * Creates the standard exception when the encoded header byte is unexpected for the decoding 355 * context. 356 * @param header value used in error message. 357 */ 358 private static IllegalArgumentException unexpectedHeader(byte header) { 359 throw new IllegalArgumentException( 360 "unexpected value in first byte: 0x" + Long.toHexString(header)); 361 } 362 363 /** 364 * Perform unsigned comparison between two long values. Conforms to the same interface as 365 * {@link org.apache.hadoop.hbase.CellComparator}. 366 */ 367 private static int unsignedCmp(long x1, long x2) { 368 int cmp; 369 if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; 370 // invert the result when either value is negative 371 if ((x1 < 0) != (x2 < 0)) return -cmp; 372 return cmp; 373 } 374 375 /** 376 * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes. 377 * @return number of bytes written. 378 */ 379 private static int putUint32(PositionedByteRange dst, int val) { 380 dst.put((byte) (val >>> 24)).put((byte) (val >>> 16)).put((byte) (val >>> 8)).put((byte) val); 381 return 4; 382 } 383 384 /** 385 * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}. 386 * @param dst The destination to which encoded bytes are written. 387 * @param val The value to write. 388 * @param comp Compliment the encoded value when {@code comp} is true. 389 * @return number of bytes written. 390 */ 391 @InterfaceAudience.Private 392 static int putVaruint64(PositionedByteRange dst, long val, boolean comp) { 393 int w, y, len = 0; 394 final int offset = dst.getOffset(), start = dst.getPosition(); 395 byte[] a = dst.getBytes(); 396 Order ord = comp ? DESCENDING : ASCENDING; 397 if (-1 == unsignedCmp(val, 241L)) { 398 dst.put((byte) val); 399 len = dst.getPosition() - start; 400 ord.apply(a, offset + start, len); 401 return len; 402 } 403 if (-1 == unsignedCmp(val, 2288L)) { 404 y = (int) (val - 240); 405 dst.put((byte) (y / 256 + 241)).put((byte) (y % 256)); 406 len = dst.getPosition() - start; 407 ord.apply(a, offset + start, len); 408 return len; 409 } 410 if (-1 == unsignedCmp(val, 67824L)) { 411 y = (int) (val - 2288); 412 dst.put((byte) 249).put((byte) (y / 256)).put((byte) (y % 256)); 413 len = dst.getPosition() - start; 414 ord.apply(a, offset + start, len); 415 return len; 416 } 417 y = (int) val; 418 w = (int) (val >>> 32); 419 if (w == 0) { 420 if (-1 == unsignedCmp(y, 16777216L)) { 421 dst.put((byte) 250).put((byte) (y >>> 16)).put((byte) (y >>> 8)).put((byte) y); 422 len = dst.getPosition() - start; 423 ord.apply(a, offset + start, len); 424 return len; 425 } 426 dst.put((byte) 251); 427 putUint32(dst, y); 428 len = dst.getPosition() - start; 429 ord.apply(a, offset + start, len); 430 return len; 431 } 432 if (-1 == unsignedCmp(w, 256L)) { 433 dst.put((byte) 252).put((byte) w); 434 putUint32(dst, y); 435 len = dst.getPosition() - start; 436 ord.apply(a, offset + start, len); 437 return len; 438 } 439 if (-1 == unsignedCmp(w, 65536L)) { 440 dst.put((byte) 253).put((byte) (w >>> 8)).put((byte) w); 441 putUint32(dst, y); 442 len = dst.getPosition() - start; 443 ord.apply(a, offset + start, len); 444 return len; 445 } 446 if (-1 == unsignedCmp(w, 16777216L)) { 447 dst.put((byte) 254).put((byte) (w >>> 16)).put((byte) (w >>> 8)).put((byte) w); 448 putUint32(dst, y); 449 len = dst.getPosition() - start; 450 ord.apply(a, offset + start, len); 451 return len; 452 } 453 dst.put((byte) 255); 454 putUint32(dst, w); 455 putUint32(dst, y); 456 len = dst.getPosition() - start; 457 ord.apply(a, offset + start, len); 458 return len; 459 } 460 461 /** 462 * Inspect {@code src} for an encoded varuint64 for its length in bytes. Preserves the state of 463 * {@code src}. 464 * @param src source buffer 465 * @param comp if true, parse the compliment of the value. 466 * @return the number of bytes consumed by this value. 467 */ 468 @InterfaceAudience.Private 469 static int lengthVaruint64(PositionedByteRange src, boolean comp) { 470 int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff; 471 if (a0 <= 240) return 1; 472 if (a0 <= 248) return 2; 473 if (a0 == 249) return 3; 474 if (a0 == 250) return 4; 475 if (a0 == 251) return 5; 476 if (a0 == 252) return 6; 477 if (a0 == 253) return 7; 478 if (a0 == 254) return 8; 479 if (a0 == 255) return 9; 480 throw unexpectedHeader(src.peek()); 481 } 482 483 /** 484 * Skip {@code src} over the encoded varuint64. 485 * @param src source buffer 486 * @param cmp if true, parse the compliment of the value. 487 * @return the number of bytes skipped. 488 */ 489 @InterfaceAudience.Private 490 static int skipVaruint64(PositionedByteRange src, boolean cmp) { 491 final int len = lengthVaruint64(src, cmp); 492 src.setPosition(src.getPosition() + len); 493 return len; 494 } 495 496 /** 497 * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the encoded value when 498 * {@code comp} is true. 499 * @return the decoded value. 500 */ 501 @InterfaceAudience.Private 502 static long getVaruint64(PositionedByteRange src, boolean comp) { 503 assert src.getRemaining() >= lengthVaruint64(src, comp); 504 final long ret; 505 Order ord = comp ? DESCENDING : ASCENDING; 506 byte x = src.get(); 507 final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; 508 if (-1 == unsignedCmp(a0, 241)) { 509 return a0; 510 } 511 x = src.get(); 512 a1 = ord.apply(x) & 0xff; 513 if (-1 == unsignedCmp(a0, 249)) { 514 return (a0 - 241L) * 256 + a1 + 240; 515 } 516 x = src.get(); 517 a2 = ord.apply(x) & 0xff; 518 if (a0 == 249) { 519 return 2288L + 256 * a1 + a2; 520 } 521 x = src.get(); 522 a3 = ord.apply(x) & 0xff; 523 if (a0 == 250) { 524 return ((long) a1 << 16L) | (a2 << 8) | a3; 525 } 526 x = src.get(); 527 a4 = ord.apply(x) & 0xff; 528 ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4; 529 if (a0 == 251) { 530 return ret; 531 } 532 x = src.get(); 533 a5 = ord.apply(x) & 0xff; 534 if (a0 == 252) { 535 return (ret << 8) | a5; 536 } 537 x = src.get(); 538 a6 = ord.apply(x) & 0xff; 539 if (a0 == 253) { 540 return (ret << 16) | (a5 << 8) | a6; 541 } 542 x = src.get(); 543 a7 = ord.apply(x) & 0xff; 544 if (a0 == 254) { 545 return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; 546 } 547 x = src.get(); 548 a8 = ord.apply(x) & 0xff; 549 return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8; 550 } 551 552 /** 553 * Strip all trailing zeros to ensure that no digit will be zero and round using our default 554 * context to ensure precision doesn't exceed max allowed. From Phoenix's {@code NumberUtil}. 555 * @return new {@link BigDecimal} instance 556 */ 557 @InterfaceAudience.Private 558 static BigDecimal normalize(BigDecimal val) { 559 return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT); 560 } 561 562 /** 563 * Read significand digits from {@code src} according to the magnitude of {@code e}. 564 * @param src The source from which to read encoded digits. 565 * @param e The magnitude of the first digit read. 566 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 567 * @return The decoded value. 568 * @throws IllegalArgumentException when read exceeds the remaining length of {@code src}. 569 */ 570 private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) { 571 // TODO: can this be made faster? 572 byte[] a = src.getBytes(); 573 final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining(); 574 Order ord = comp ? DESCENDING : ASCENDING; 575 BigDecimal m; 576 StringBuilder sb = new StringBuilder(); 577 for (int i = 0;; i++) { 578 if (i > remaining) { 579 // we've exceeded this range's window 580 src.setPosition(start); 581 throw new IllegalArgumentException( 582 "Read exceeds range before termination byte found. offset: " + offset + " position: " 583 + (start + i)); 584 } 585 // one byte -> 2 digits 586 // base-100 digits are encoded as val * 2 + 1 except for the termination digit. 587 int twoDigits = (ord.apply(a[offset + start + i]) & 0xff) / 2; 588 sb.append(String.format("%02d", twoDigits)); 589 // detect termination digit 590 // Besides, as we will normalise the return value at last, 591 // we only need to decode at most MAX_PRECISION + 2 digits here. 592 if ((ord.apply(a[offset + start + i]) & 1) == 0 || sb.length() > MAX_PRECISION + 1) { 593 src.setPosition(start + i + 1); 594 break; 595 } 596 } 597 m = new BigDecimal(sb.toString()); 598 int stepsMoveLeft = sb.charAt(0) != '0' ? m.precision() : m.precision() + 1; 599 stepsMoveLeft -= e * 2; 600 601 return normalize(m.movePointLeft(stepsMoveLeft)); 602 } 603 604 /** 605 * Skip {@code src} over the significand bytes. 606 * @param src The source from which to read encoded digits. 607 * @param comp Treat encoded bytes as compliments when {@code comp} is true. 608 * @return the number of bytes skipped. 609 */ 610 private static int skipSignificand(PositionedByteRange src, boolean comp) { 611 byte[] a = src.getBytes(); 612 final int offset = src.getOffset(), start = src.getPosition(); 613 int i = src.getPosition(); 614 while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0) 615 ; 616 src.setPosition(i); 617 return i - start; 618 } 619 620 /** 621 * <p> 622 * Encode the small magnitude floating point number {@code val} using the key encoding. The caller 623 * guarantees that 1.0 > abs(val) > 0.0. 624 * </p> 625 * <p> 626 * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}. 627 * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value 628 * possible without making {@code M} greater than or equal to 1.0. 629 * </p> 630 * <p> 631 * For this routine, {@code E} will always be zero or negative, since the original value is less 632 * than one. The encoding written by this routine is the ones-complement of the varint of the 633 * negative of {@code E} followed by the mantissa: 634 * 635 * <pre> 636 * Encoding: ~-E M 637 * </pre> 638 * </p> 639 * @param dst The destination to which encoded digits are written. 640 * @param val The value to encode. 641 * @return the number of bytes written. 642 */ 643 private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) { 644 // TODO: this can be done faster? 645 // assert 1.0 > abs(val) > 0.0 646 BigDecimal abs = val.abs(); 647 assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0; 648 byte[] a = dst.getBytes(); 649 boolean isNeg = val.signum() == -1; 650 final int offset = dst.getOffset(), start = dst.getPosition(); 651 652 if (isNeg) { /* Small negative number: 0x14, -E, ~M */ 653 dst.put(NEG_SMALL); 654 } else { /* Small positive number: 0x16, ~-E, M */ 655 dst.put(POS_SMALL); 656 } 657 658 // normalize abs(val) to determine E 659 int zerosBeforeFirstNonZero = abs.scale() - abs.precision(); 660 int lengthToMoveRight = 661 zerosBeforeFirstNonZero % 2 == 0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1; 662 int e = lengthToMoveRight / 2; 663 abs = abs.movePointRight(lengthToMoveRight); 664 665 putVaruint64(dst, e, !isNeg); // encode appropriate E value. 666 667 // encode M by peeling off centimal digits, encoding x as 2x+1 668 int startM = dst.getPosition(); 669 encodeToCentimal(dst, abs); 670 // terminal digit should be 2x 671 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 672 if (isNeg) { 673 // negative values encoded as ~M 674 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 675 } 676 return dst.getPosition() - start; 677 } 678 679 /** 680 * Encode the large magnitude floating point number {@code val} using the key encoding. The caller 681 * guarantees that {@code val} will be finite and abs(val) >= 1.0. 682 * <p> 683 * A floating point value is encoded as an integer exponent {@code E} and a mantissa {@code M}. 684 * The original value is equal to {@code (M * 100^E)}. {@code E} is set to the smallest value 685 * possible without making {@code M} greater than or equal to 1.0. 686 * </p> 687 * <p> 688 * Each centimal digit of the mantissa is stored in a byte. If the value of the centimal digit is 689 * {@code X} (hence {@code X>=0} and {@code X<=99}) then the byte value will be {@code 2*X+1} for 690 * every byte of the mantissa, except for the last byte which will be {@code 2*X+0}. The mantissa 691 * must be the minimum number of bytes necessary to represent the value; trailing {@code X==0} 692 * digits are omitted. This means that the mantissa will never contain a byte with the value 693 * {@code 0x00}. 694 * </p> 695 * <p> 696 * If {@code E > 10}, then this routine writes of {@code E} as a varint followed by the mantissa 697 * as described above. Otherwise, if {@code E <= 10}, this routine only writes the mantissa and 698 * leaves the {@code E} value to be encoded as part of the opening byte of the field by the 699 * calling function. 700 * 701 * <pre> 702 * Encoding: M (if E<=10) 703 * E M (if E>10) 704 * </pre> 705 * </p> 706 * @param dst The destination to which encoded digits are written. 707 * @param val The value to encode. 708 * @return the number of bytes written. 709 */ 710 private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) { 711 // TODO: this can be done faster 712 BigDecimal abs = val.abs(); 713 byte[] a = dst.getBytes(); 714 boolean isNeg = val.signum() == -1; 715 final int start = dst.getPosition(), offset = dst.getOffset(); 716 717 if (isNeg) { /* Large negative number: 0x08, ~E, ~M */ 718 dst.put(NEG_LARGE); 719 } else { /* Large positive number: 0x22, E, M */ 720 dst.put(POS_LARGE); 721 } 722 723 // normalize abs(val) to determine E 724 int integerDigits = abs.precision() - abs.scale(); 725 int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1; 726 int e = lengthToMoveLeft / 2; 727 abs = abs.movePointLeft(lengthToMoveLeft); 728 729 // encode appropriate header byte and/or E value. 730 if (e > 10) { /* large number, write out {~,}E */ 731 putVaruint64(dst, e, isNeg); 732 } else { 733 if (isNeg) { /* Medium negative number: 0x13-E, ~M */ 734 dst.put(start, (byte) (NEG_MED_MAX - e)); 735 } else { /* Medium positive number: 0x17+E, M */ 736 dst.put(start, (byte) (POS_MED_MIN + e)); 737 } 738 } 739 740 // encode M by peeling off centimal digits, encoding x as 2x+1 741 int startM = dst.getPosition(); 742 encodeToCentimal(dst, abs); 743 // terminal digit should be 2x 744 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe); 745 if (isNeg) { 746 // negative values encoded as ~M 747 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM); 748 } 749 return dst.getPosition() - start; 750 } 751 752 /** 753 * Encode a value val in [0.01, 1.0) into Centimals. Util function for 754 * {@link OrderedBytes#encodeNumericLarge(PositionedByteRange, BigDecimal)} and 755 * {@link OrderedBytes#encodeNumericSmall(PositionedByteRange, BigDecimal)} 756 * @param dst The destination to which encoded digits are written. 757 * @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0). 758 */ 759 private static void encodeToCentimal(PositionedByteRange dst, BigDecimal val) { 760 // The input value val must be in [0.01, 1.0) 761 String stringOfAbs = val.stripTrailingZeros().toPlainString(); 762 String value = stringOfAbs.substring(stringOfAbs.indexOf('.') + 1); 763 int d; 764 765 // If the first float digit is 0, we will encode one digit more than MAX_PRECISION 766 // We encode at most MAX_PRECISION significant digits into centimals, 767 // because the input value, has been already normalized. 768 int maxPrecision = value.charAt(0) == '0' ? MAX_PRECISION + 1 : MAX_PRECISION; 769 maxPrecision = Math.min(maxPrecision, value.length()); 770 for (int i = 0; i < maxPrecision; i += 2) { 771 d = (value.charAt(i) - '0') * 10; 772 if (i + 1 < maxPrecision) { 773 d += (value.charAt(i + 1) - '0'); 774 } 775 dst.put((byte) (2 * d + 1)); 776 } 777 } 778 779 /** 780 * Encode a numerical value using the variable-length encoding. 781 * @param dst The destination to which encoded digits are written. 782 * @param val The value to encode. 783 * @param ord The {@link Order} to respect while encoding {@code val}. 784 * @return the number of bytes written. 785 */ 786 public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) { 787 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 788 } 789 790 /** 791 * Encode a numerical value using the variable-length encoding. 792 * @param dst The destination to which encoded digits are written. 793 * @param val The value to encode. 794 * @param ord The {@link Order} to respect while encoding {@code val}. 795 * @return the number of bytes written. 796 */ 797 public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) { 798 if (val == 0.0) { 799 dst.put(ord.apply(ZERO)); 800 return 1; 801 } 802 if (Double.isNaN(val)) { 803 dst.put(ord.apply(NAN)); 804 return 1; 805 } 806 if (val == Double.NEGATIVE_INFINITY) { 807 dst.put(ord.apply(NEG_INF)); 808 return 1; 809 } 810 if (val == Double.POSITIVE_INFINITY) { 811 dst.put(ord.apply(POS_INF)); 812 return 1; 813 } 814 return encodeNumeric(dst, BigDecimal.valueOf(val), ord); 815 } 816 817 /** 818 * Encode a numerical value using the variable-length encoding. If the number of significant 819 * digits of the value exceeds the {@link OrderedBytes#MAX_PRECISION}, the exceeding part will be 820 * lost. 821 * @param dst The destination to which encoded digits are written. 822 * @param val The value to encode. 823 * @param ord The {@link Order} to respect while encoding {@code val}. 824 * @return the number of bytes written. 825 */ 826 public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) { 827 final int len, offset = dst.getOffset(), start = dst.getPosition(); 828 if (null == val) { 829 return encodeNull(dst, ord); 830 } else if (BigDecimal.ZERO.compareTo(val) == 0) { 831 dst.put(ord.apply(ZERO)); 832 return 1; 833 } 834 BigDecimal abs = val.abs(); 835 if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0 836 len = encodeNumericLarge(dst, normalize(val)); 837 } else { // 1.0 > abs(v) >= 0.0 838 len = encodeNumericSmall(dst, normalize(val)); 839 } 840 ord.apply(dst.getBytes(), offset + start, len); 841 return len; 842 } 843 844 /** 845 * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes a value in Numeric 846 * encoding and is within the valid range of {@link BigDecimal} values. {@link BigDecimal} does 847 * not support {@code NaN} or {@code Infinte} values. 848 * @see #decodeNumericAsDouble(PositionedByteRange) 849 */ 850 private static BigDecimal decodeNumericValue(PositionedByteRange src) { 851 final int e; 852 byte header = src.get(); 853 boolean dsc = -1 == Integer.signum(header); 854 header = dsc ? DESCENDING.apply(header) : header; 855 856 if (header == NULL) return null; 857 if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */ 858 e = (int) getVaruint64(src, !dsc); 859 return decodeSignificand(src, e, !dsc).negate(); 860 } 861 if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) { 862 /* Medium negative number: 0x13-E, ~M */ 863 e = NEG_MED_MAX - header; 864 return decodeSignificand(src, e, !dsc).negate(); 865 } 866 if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */ 867 e = (int) -getVaruint64(src, dsc); 868 return decodeSignificand(src, e, !dsc).negate(); 869 } 870 if (header == ZERO) { 871 return BigDecimal.ZERO; 872 } 873 if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */ 874 e = (int) -getVaruint64(src, !dsc); 875 return decodeSignificand(src, e, dsc); 876 } 877 if (header >= POS_MED_MIN && header <= POS_MED_MAX) { 878 /* Medium positive number: 0x17+E, M */ 879 e = header - POS_MED_MIN; 880 return decodeSignificand(src, e, dsc); 881 } 882 if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */ 883 e = (int) getVaruint64(src, dsc); 884 return decodeSignificand(src, e, dsc); 885 } 886 throw unexpectedHeader(header); 887 } 888 889 /** 890 * Decode a primitive {@code double} value from the Numeric encoding. Numeric encoding is based on 891 * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a 892 * {@code double}, this method performs an implicit narrowing conversion as described in 893 * {@link BigDecimal#doubleValue()}. 894 * @throws NullPointerException when the encoded value is {@code NULL}. 895 * @throws IllegalArgumentException when the encoded value is not a Numeric. 896 * @see #encodeNumeric(PositionedByteRange, double, Order) 897 * @see BigDecimal#doubleValue() 898 */ 899 public static double decodeNumericAsDouble(PositionedByteRange src) { 900 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 901 if (isNull(src)) { 902 throw new NullPointerException("A null value cannot be decoded to a double."); 903 } 904 if (isNumericNaN(src)) { 905 src.get(); 906 return Double.NaN; 907 } 908 if (isNumericZero(src)) { 909 src.get(); 910 return Double.valueOf(0.0); 911 } 912 913 byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek(); 914 915 if (header == NEG_INF) { 916 src.get(); 917 return Double.NEGATIVE_INFINITY; 918 } else if (header == POS_INF) { 919 src.get(); 920 return Double.POSITIVE_INFINITY; 921 } else { 922 return decodeNumericValue(src).doubleValue(); 923 } 924 } 925 926 /** 927 * Decode a primitive {@code long} value from the Numeric encoding. Numeric encoding is based on 928 * {@link BigDecimal}; in the event the encoded value is larger than can be represented in a 929 * {@code long}, this method performs an implicit narrowing conversion as described in 930 * {@link BigDecimal#doubleValue()}. 931 * @throws NullPointerException when the encoded value is {@code NULL}. 932 * @throws IllegalArgumentException when the encoded value is not a Numeric. 933 * @see #encodeNumeric(PositionedByteRange, long, Order) 934 * @see BigDecimal#longValue() 935 */ 936 public static long decodeNumericAsLong(PositionedByteRange src) { 937 // TODO: should an encoded NULL value throw unexpectedHeader() instead? 938 if (isNull(src)) throw new NullPointerException(); 939 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 940 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 941 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 942 943 if (isNumericZero(src)) { 944 src.get(); 945 return Long.valueOf(0); 946 } 947 return decodeNumericValue(src).longValue(); 948 } 949 950 /** 951 * Decode a {@link BigDecimal} value from the variable-length encoding. 952 * @throws IllegalArgumentException when the encoded value is not a Numeric. 953 * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order) 954 */ 955 public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) { 956 if (isNull(src)) { 957 src.get(); 958 return null; 959 } 960 if (!isNumeric(src)) throw unexpectedHeader(src.peek()); 961 if (isNumericNaN(src)) throw unexpectedHeader(src.peek()); 962 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek()); 963 return decodeNumericValue(src); 964 } 965 966 /** 967 * Encode a String value. String encoding is 0x00-terminated and so it does not support 968 * {@code \u0000} codepoints in the value. 969 * @param dst The destination to which the encoded value is written. 970 * @param val The value to encode. 971 * @param ord The {@link Order} to respect while encoding {@code val}. 972 * @return the number of bytes written. 973 * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}. 974 */ 975 public static int encodeString(PositionedByteRange dst, String val, Order ord) { 976 if (null == val) { 977 return encodeNull(dst, ord); 978 } 979 if (val.contains("\u0000")) 980 throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); 981 final int offset = dst.getOffset(), start = dst.getPosition(); 982 dst.put(TEXT); 983 // TODO: is there no way to decode into dst directly? 984 dst.put(val.getBytes(UTF8)); 985 dst.put(TERM); 986 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 987 return dst.getPosition() - start; 988 } 989 990 /** 991 * Decode a String value. 992 */ 993 public static String decodeString(PositionedByteRange src) { 994 final byte header = src.get(); 995 if (header == NULL || header == DESCENDING.apply(NULL)) return null; 996 assert header == TEXT || header == DESCENDING.apply(TEXT); 997 Order ord = header == TEXT ? ASCENDING : DESCENDING; 998 byte[] a = src.getBytes(); 999 final int offset = src.getOffset(), start = src.getPosition(); 1000 final byte terminator = ord.apply(TERM); 1001 int rawStartPos = offset + start, rawTermPos = rawStartPos; 1002 for (; a[rawTermPos] != terminator; rawTermPos++) 1003 ; 1004 src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1 1005 if (DESCENDING == ord) { 1006 // make a copy so that we don't disturb encoded value with ord. 1007 byte[] copy = new byte[rawTermPos - rawStartPos]; 1008 System.arraycopy(a, rawStartPos, copy, 0, copy.length); 1009 ord.apply(copy); 1010 return new String(copy, UTF8); 1011 } else { 1012 return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8); 1013 } 1014 } 1015 1016 /** 1017 * Calculate the expected BlobVar encoded length based on unencoded length. 1018 */ 1019 public static int blobVarEncodedLength(int len) { 1020 if (0 == len) return 2; // 1-byte header + 1-byte terminator 1021 else return (int) Math.ceil((len * 8) // 8-bits per input byte 1022 / 7.0) // 7-bits of input data per encoded byte, rounded up 1023 + 1; // + 1-byte header 1024 } 1025 1026 /** 1027 * Calculate the expected BlobVar decoded length based on encoded length. 1028 */ 1029 @InterfaceAudience.Private 1030 static int blobVarDecodedLength(int len) { 1031 return ((len - 1) // 1-byte header 1032 * 7) // 7-bits of payload per encoded byte 1033 / 8; // 8-bits per byte 1034 } 1035 1036 /** 1037 * Encode a Blob value using a modified varint encoding scheme. 1038 * <p> 1039 * This format encodes a byte[] value such that no limitations on the input value are imposed. The 1040 * first byte encodes the encoding scheme that follows, {@link #BLOB_VAR}. Each encoded byte 1041 * thereafter consists of a header bit followed by 7 bits of payload. A header bit of '1' 1042 * indicates continuation of the encoding. A header bit of '0' indicates this byte contains the 1043 * last of the payload. An empty input value is encoded as the header byte immediately followed by 1044 * a termination byte {@code 0x00}. This is not ambiguous with the encoded value of {@code []}, 1045 * which results in {@code [0x80, 0x00]}. 1046 * </p> 1047 * @return the number of bytes written. 1048 */ 1049 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen, 1050 Order ord) { 1051 if (null == val) { 1052 return encodeNull(dst, ord); 1053 } 1054 // Empty value is null-terminated. All other values are encoded as 7-bits per byte. 1055 assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected."; 1056 final int offset = dst.getOffset(), start = dst.getPosition(); 1057 dst.put(BLOB_VAR); 1058 if (0 == vlen) { 1059 dst.put(TERM); 1060 } else { 1061 byte s = 1, t = 0; 1062 for (int i = voff; i < vlen; i++) { 1063 dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s))); 1064 if (s < 7) { 1065 t = (byte) (val[i] << (7 - s)); 1066 s++; 1067 } else { 1068 dst.put((byte) (0x80 | val[i])); 1069 s = 1; 1070 t = 0; 1071 } 1072 } 1073 if (s > 1) { 1074 dst.put((byte) (0x7f & t)); 1075 } else { 1076 dst.getBytes()[offset + dst.getPosition() - 1] = 1077 (byte) (dst.getBytes()[offset + dst.getPosition() - 1] & 0x7f); 1078 } 1079 } 1080 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1081 return dst.getPosition() - start; 1082 } 1083 1084 /** 1085 * Encode a blob value using a modified varint encoding scheme. 1086 * @return the number of bytes written. 1087 * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order) 1088 */ 1089 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) { 1090 return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord); 1091 } 1092 1093 /** 1094 * Decode a blob value that was encoded using BlobVar encoding. 1095 */ 1096 public static byte[] decodeBlobVar(PositionedByteRange src) { 1097 final byte header = src.get(); 1098 if (header == NULL || header == DESCENDING.apply(NULL)) { 1099 return null; 1100 } 1101 assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR); 1102 Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING; 1103 if (src.peek() == ord.apply(TERM)) { 1104 // skip empty input buffer. 1105 src.get(); 1106 return new byte[0]; 1107 } 1108 final int offset = src.getOffset(), start = src.getPosition(); 1109 int end; 1110 byte[] a = src.getBytes(); 1111 for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++) 1112 ; 1113 end++; // increment end to 1-past last byte 1114 // create ret buffer using length of encoded data + 1 (header byte) 1115 PositionedByteRange ret = 1116 new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start + 1)); 1117 int s = 6; 1118 byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff); 1119 for (int i = start + 1; i < end; i++) { 1120 if (s == 7) { 1121 ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f))); 1122 i++; 1123 // explicitly reset t -- clean up overflow buffer after decoding 1124 // a full cycle and retain assertion condition below. This happens 1125 t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893) 1126 } else { 1127 ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s))); 1128 } 1129 if (i == end) break; 1130 t = (byte) ((ord.apply(a[offset + i]) << (8 - s)) & 0xff); 1131 s = s == 1 ? 7 : s - 1; 1132 } 1133 src.setPosition(end); 1134 assert t == 0 : "Unexpected bits remaining after decoding blob."; 1135 assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer."; 1136 return ret.getBytes(); 1137 } 1138 1139 /** 1140 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL 1141 * terminated so as to preserve proper sorting of {@code []} and so it does not support 1142 * {@code 0x00} in the value. 1143 * @return the number of bytes written. 1144 * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a 1145 * {@code 0x00} byte. 1146 */ 1147 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen, 1148 Order ord) { 1149 if (null == val) { 1150 encodeNull(dst, ord); 1151 if (ASCENDING == ord) return 1; 1152 else { 1153 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1154 // sort-order semantics of null values. 1155 dst.put(ord.apply(TERM)); 1156 return 2; 1157 } 1158 } 1159 // Blobs as final entry in a compound key are written unencoded. 1160 assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2); 1161 if (DESCENDING == ord) { 1162 for (int i = 0; i < vlen; i++) { 1163 if (TERM == val[voff + i]) { 1164 throw new IllegalArgumentException("0x00 bytes not permitted in value."); 1165 } 1166 } 1167 } 1168 final int offset = dst.getOffset(), start = dst.getPosition(); 1169 dst.put(BLOB_COPY); 1170 dst.put(val, voff, vlen); 1171 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1172 // sort-order semantics of null values. 1173 if (DESCENDING == ord) dst.put(TERM); 1174 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start); 1175 return dst.getPosition() - start; 1176 } 1177 1178 /** 1179 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in DESCENDING order is NULL 1180 * terminated so as to preserve proper sorting of {@code []} and so it does not support 1181 * {@code 0x00} in the value. 1182 * @return the number of bytes written. 1183 * @throws IllegalArgumentException when {@code ord} is DESCENDING and {@code val} contains a 1184 * {@code 0x00} byte. 1185 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1186 */ 1187 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) { 1188 return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord); 1189 } 1190 1191 /** 1192 * Decode a Blob value, byte-for-byte copy. 1193 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order) 1194 */ 1195 public static byte[] decodeBlobCopy(PositionedByteRange src) { 1196 byte header = src.get(); 1197 if (header == NULL || header == DESCENDING.apply(NULL)) { 1198 return null; 1199 } 1200 assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY); 1201 Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING; 1202 final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1); 1203 byte[] ret = new byte[length]; 1204 src.get(ret); 1205 ord.apply(ret, 0, ret.length); 1206 // DESCENDING ordered BlobCopy requires a termination bit to preserve 1207 // sort-order semantics of null values. 1208 if (DESCENDING == ord) src.get(); 1209 return ret; 1210 } 1211 1212 /** 1213 * Encode a null value. 1214 * @param dst The destination to which encoded digits are written. 1215 * @param ord The {@link Order} to respect while encoding {@code val}. 1216 * @return the number of bytes written. 1217 */ 1218 public static int encodeNull(PositionedByteRange dst, Order ord) { 1219 dst.put(ord.apply(NULL)); 1220 return 1; 1221 } 1222 1223 /** 1224 * Encode an {@code int8} value using the fixed-length encoding. 1225 * @return the number of bytes written. 1226 * @see #encodeInt64(PositionedByteRange, long, Order) 1227 * @see #decodeInt8(PositionedByteRange) 1228 */ 1229 public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) { 1230 final int offset = dst.getOffset(), start = dst.getPosition(); 1231 dst.put(FIXED_INT8).put((byte) (val ^ 0x80)); 1232 ord.apply(dst.getBytes(), offset + start, 2); 1233 return 2; 1234 } 1235 1236 /** 1237 * Decode an {@code int8} value. 1238 * @see #encodeInt8(PositionedByteRange, byte, Order) 1239 */ 1240 public static byte decodeInt8(PositionedByteRange src) { 1241 final byte header = src.get(); 1242 assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8); 1243 Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING; 1244 return (byte) ((ord.apply(src.get()) ^ 0x80) & 0xff); 1245 } 1246 1247 /** 1248 * Encode an {@code int16} value using the fixed-length encoding. 1249 * @return the number of bytes written. 1250 * @see #encodeInt64(PositionedByteRange, long, Order) 1251 * @see #decodeInt16(PositionedByteRange) 1252 */ 1253 public static int encodeInt16(PositionedByteRange dst, short val, Order ord) { 1254 final int offset = dst.getOffset(), start = dst.getPosition(); 1255 dst.put(FIXED_INT16).put((byte) ((val >> 8) ^ 0x80)).put((byte) val); 1256 ord.apply(dst.getBytes(), offset + start, 3); 1257 return 3; 1258 } 1259 1260 /** 1261 * Decode an {@code int16} value. 1262 * @see #encodeInt16(PositionedByteRange, short, Order) 1263 */ 1264 public static short decodeInt16(PositionedByteRange src) { 1265 final byte header = src.get(); 1266 assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16); 1267 Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING; 1268 short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff); 1269 val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff)); 1270 return val; 1271 } 1272 1273 /** 1274 * Encode an {@code int32} value using the fixed-length encoding. 1275 * @return the number of bytes written. 1276 * @see #encodeInt64(PositionedByteRange, long, Order) 1277 * @see #decodeInt32(PositionedByteRange) 1278 */ 1279 public static int encodeInt32(PositionedByteRange dst, int val, Order ord) { 1280 final int offset = dst.getOffset(), start = dst.getPosition(); 1281 dst.put(FIXED_INT32).put((byte) ((val >> 24) ^ 0x80)).put((byte) (val >> 16)) 1282 .put((byte) (val >> 8)).put((byte) val); 1283 ord.apply(dst.getBytes(), offset + start, 5); 1284 return 5; 1285 } 1286 1287 /** 1288 * Decode an {@code int32} value. 1289 * @see #encodeInt32(PositionedByteRange, int, Order) 1290 */ 1291 public static int decodeInt32(PositionedByteRange src) { 1292 final byte header = src.get(); 1293 assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32); 1294 Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING; 1295 int val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1296 for (int i = 1; i < 4; i++) { 1297 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1298 } 1299 return val; 1300 } 1301 1302 /** 1303 * Encode an {@code int64} value using the fixed-length encoding. 1304 * <p> 1305 * This format ensures that all longs sort in their natural order, as they would sort when using 1306 * signed long comparison. 1307 * </p> 1308 * <p> 1309 * All Longs are serialized to an 8-byte, fixed-width sortable byte format. Serialization is 1310 * performed by inverting the integer sign bit and writing the resulting bytes to the byte array 1311 * in big endian order. The encoded value is prefixed by the {@link #FIXED_INT64} header byte. 1312 * This encoding is designed to handle java language primitives and so Null values are NOT 1313 * supported by this implementation. 1314 * </p> 1315 * <p> 1316 * For example: 1317 * </p> 1318 * 1319 * <pre> 1320 * Input: 0x0000000000000005 (5) 1321 * Result: 0x288000000000000005 1322 * 1323 * Input: 0xfffffffffffffffb (-4) 1324 * Result: 0x280000000000000004 1325 * 1326 * Input: 0x7fffffffffffffff (Long.MAX_VALUE) 1327 * Result: 0x28ffffffffffffffff 1328 * 1329 * Input: 0x8000000000000000 (Long.MIN_VALUE) 1330 * Result: 0x287fffffffffffffff 1331 * </pre> 1332 * <p> 1333 * This encoding format, and much of this documentation string, is based on Orderly's 1334 * {@code FixedIntWritableRowKey}. 1335 * </p> 1336 * @return the number of bytes written. 1337 * @see #decodeInt64(PositionedByteRange) 1338 */ 1339 public static int encodeInt64(PositionedByteRange dst, long val, Order ord) { 1340 final int offset = dst.getOffset(), start = dst.getPosition(); 1341 dst.put(FIXED_INT64).put((byte) ((val >> 56) ^ 0x80)).put((byte) (val >> 48)) 1342 .put((byte) (val >> 40)).put((byte) (val >> 32)).put((byte) (val >> 24)) 1343 .put((byte) (val >> 16)).put((byte) (val >> 8)).put((byte) val); 1344 ord.apply(dst.getBytes(), offset + start, 9); 1345 return 9; 1346 } 1347 1348 /** 1349 * Decode an {@code int64} value. 1350 * @see #encodeInt64(PositionedByteRange, long, Order) 1351 */ 1352 public static long decodeInt64(PositionedByteRange src) { 1353 final byte header = src.get(); 1354 assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64); 1355 Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING; 1356 long val = (ord.apply(src.get()) ^ 0x80) & 0xff; 1357 for (int i = 1; i < 8; i++) { 1358 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1359 } 1360 return val; 1361 } 1362 1363 /** 1364 * Encode a 32-bit floating point value using the fixed-length encoding. Encoding format is 1365 * described at length in {@link #encodeFloat64(PositionedByteRange, double, Order)}. 1366 * @return the number of bytes written. 1367 * @see #decodeFloat32(PositionedByteRange) 1368 * @see #encodeFloat64(PositionedByteRange, double, Order) 1369 */ 1370 public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) { 1371 final int offset = dst.getOffset(), start = dst.getPosition(); 1372 int i = Float.floatToIntBits(val); 1373 i ^= ((i >> (Integer.SIZE - 1)) | Integer.MIN_VALUE); 1374 dst.put(FIXED_FLOAT32).put((byte) (i >> 24)).put((byte) (i >> 16)).put((byte) (i >> 8)) 1375 .put((byte) i); 1376 ord.apply(dst.getBytes(), offset + start, 5); 1377 return 5; 1378 } 1379 1380 /** 1381 * Decode a 32-bit floating point value using the fixed-length encoding. 1382 * @see #encodeFloat32(PositionedByteRange, float, Order) 1383 */ 1384 public static float decodeFloat32(PositionedByteRange src) { 1385 final byte header = src.get(); 1386 assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32); 1387 Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING; 1388 int val = ord.apply(src.get()) & 0xff; 1389 for (int i = 1; i < 4; i++) { 1390 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1391 } 1392 val ^= (~val >> (Integer.SIZE - 1)) | Integer.MIN_VALUE; 1393 return Float.intBitsToFloat(val); 1394 } 1395 1396 /** 1397 * Encode a 64-bit floating point value using the fixed-length encoding. 1398 * <p> 1399 * This format ensures the following total ordering of floating point values: 1400 * Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < -Double.MIN_VALUE < -0.0 < 1401 * +0.0; < Double.MIN_VALUE < ... < Double.MAX_VALUE < Double.POSITIVE_INFINITY < 1402 * Double.NaN 1403 * </p> 1404 * <p> 1405 * Floating point numbers are encoded as specified in IEEE 754. A 64-bit double precision float 1406 * consists of a sign bit, 11-bit unsigned exponent encoded in offset-1023 notation, and a 52-bit 1407 * significand. The format is described further in the 1408 * <a href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision Floating Point 1409 * Wikipedia page</a> 1410 * </p> 1411 * <p> 1412 * The value of a normal float is -1 <sup>sign bit</sup> × 2<sup>exponent - 1023</sup> 1413 * × 1.significand 1414 * </p> 1415 * <p> 1416 * The IEE754 floating point format already preserves sort ordering for positive floating point 1417 * numbers when the raw bytes are compared in most significant byte order. This is discussed 1418 * further at 1419 * <a href= "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm" > 1420 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. htm</a> 1421 * </p> 1422 * <p> 1423 * Thus, we need only ensure that negative numbers sort in the the exact opposite order as 1424 * positive numbers (so that say, negative infinity is less than negative 1), and that all 1425 * negative numbers compare less than any positive number. To accomplish this, we invert the sign 1426 * bit of all floating point numbers, and we also invert the exponent and significand bits if the 1427 * floating point number was negative. 1428 * </p> 1429 * <p> 1430 * More specifically, we first store the floating point bits into a 64-bit long {@code l} using 1431 * {@link Double#doubleToLongBits}. This method collapses all NaNs into a single, canonical NaN 1432 * value but otherwise leaves the bits unchanged. We then compute 1433 * </p> 1434 * 1435 * <pre> 1436 * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE 1437 * </pre> 1438 * <p> 1439 * which inverts the sign bit and XOR's all other bits with the sign bit itself. Comparing the raw 1440 * bytes of {@code l} in most significant byte order is equivalent to performing a double 1441 * precision floating point comparison on the underlying bits (ignoring NaN comparisons, as NaNs 1442 * don't compare equal to anything when performing floating point comparisons). 1443 * </p> 1444 * <p> 1445 * The resulting long integer is then converted into a byte array by serializing the long one byte 1446 * at a time in most significant byte order. The serialized integer is prefixed by a single header 1447 * byte. All serialized values are 9 bytes in length. 1448 * </p> 1449 * <p> 1450 * This encoding format, and much of this highly detailed documentation string, is based on 1451 * Orderly's {@code DoubleWritableRowKey}. 1452 * </p> 1453 * @return the number of bytes written. 1454 * @see #decodeFloat64(PositionedByteRange) 1455 */ 1456 public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) { 1457 final int offset = dst.getOffset(), start = dst.getPosition(); 1458 long lng = Double.doubleToLongBits(val); 1459 lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE); 1460 dst.put(FIXED_FLOAT64).put((byte) (lng >> 56)).put((byte) (lng >> 48)).put((byte) (lng >> 40)) 1461 .put((byte) (lng >> 32)).put((byte) (lng >> 24)).put((byte) (lng >> 16)) 1462 .put((byte) (lng >> 8)).put((byte) lng); 1463 ord.apply(dst.getBytes(), offset + start, 9); 1464 return 9; 1465 } 1466 1467 /** 1468 * Decode a 64-bit floating point value using the fixed-length encoding. 1469 * @see #encodeFloat64(PositionedByteRange, double, Order) 1470 */ 1471 public static double decodeFloat64(PositionedByteRange src) { 1472 final byte header = src.get(); 1473 assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64); 1474 Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING; 1475 long val = ord.apply(src.get()) & 0xff; 1476 for (int i = 1; i < 8; i++) { 1477 val = (val << 8) + (ord.apply(src.get()) & 0xff); 1478 } 1479 val ^= (~val >> (Long.SIZE - 1)) | Long.MIN_VALUE; 1480 return Double.longBitsToDouble(val); 1481 } 1482 1483 /** 1484 * Returns true when {@code src} appears to be positioned an encoded value, false otherwise. 1485 */ 1486 public static boolean isEncodedValue(PositionedByteRange src) { 1487 return isNull(src) || isNumeric(src) || isFixedInt8(src) || isFixedInt16(src) 1488 || isFixedInt32(src) || isFixedInt64(src) || isFixedFloat32(src) || isFixedFloat64(src) 1489 || isText(src) || isBlobCopy(src) || isBlobVar(src); 1490 } 1491 1492 /** 1493 * Return true when the next encoded value in {@code src} is null, false otherwise. 1494 */ 1495 public static boolean isNull(PositionedByteRange src) { 1496 return NULL == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1497 } 1498 1499 /** 1500 * Return true when the next encoded value in {@code src} uses Numeric encoding, false otherwise. 1501 * {@code NaN}, {@code +/-Inf} are valid Numeric values. 1502 */ 1503 public static boolean isNumeric(PositionedByteRange src) { 1504 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1505 return x >= NEG_INF && x <= NAN; 1506 } 1507 1508 /** 1509 * Return true when the next encoded value in {@code src} uses Numeric encoding and is 1510 * {@code Infinite}, false otherwise. 1511 */ 1512 public static boolean isNumericInfinite(PositionedByteRange src) { 1513 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1514 return NEG_INF == x || POS_INF == x; 1515 } 1516 1517 /** 1518 * Return true when the next encoded value in {@code src} uses Numeric encoding and is 1519 * {@code NaN}, false otherwise. 1520 */ 1521 public static boolean isNumericNaN(PositionedByteRange src) { 1522 return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1523 } 1524 1525 /** 1526 * Return true when the next encoded value in {@code src} uses Numeric encoding and is {@code 0}, 1527 * false otherwise. 1528 */ 1529 public static boolean isNumericZero(PositionedByteRange src) { 1530 return ZERO == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1531 } 1532 1533 /** 1534 * Return true when the next encoded value in {@code src} uses fixed-width Int8 encoding, false 1535 * otherwise. 1536 */ 1537 public static boolean isFixedInt8(PositionedByteRange src) { 1538 return FIXED_INT8 1539 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1540 } 1541 1542 /** 1543 * Return true when the next encoded value in {@code src} uses fixed-width Int16 encoding, false 1544 * otherwise. 1545 */ 1546 public static boolean isFixedInt16(PositionedByteRange src) { 1547 return FIXED_INT16 1548 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1549 } 1550 1551 /** 1552 * Return true when the next encoded value in {@code src} uses fixed-width Int32 encoding, false 1553 * otherwise. 1554 */ 1555 public static boolean isFixedInt32(PositionedByteRange src) { 1556 return FIXED_INT32 1557 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1558 } 1559 1560 /** 1561 * Return true when the next encoded value in {@code src} uses fixed-width Int64 encoding, false 1562 * otherwise. 1563 */ 1564 public static boolean isFixedInt64(PositionedByteRange src) { 1565 return FIXED_INT64 1566 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1567 } 1568 1569 /** 1570 * Return true when the next encoded value in {@code src} uses fixed-width Float32 encoding, false 1571 * otherwise. 1572 */ 1573 public static boolean isFixedFloat32(PositionedByteRange src) { 1574 return FIXED_FLOAT32 1575 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1576 } 1577 1578 /** 1579 * Return true when the next encoded value in {@code src} uses fixed-width Float64 encoding, false 1580 * otherwise. 1581 */ 1582 public static boolean isFixedFloat64(PositionedByteRange src) { 1583 return FIXED_FLOAT64 1584 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1585 } 1586 1587 /** 1588 * Return true when the next encoded value in {@code src} uses Text encoding, false otherwise. 1589 */ 1590 public static boolean isText(PositionedByteRange src) { 1591 return TEXT == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1592 } 1593 1594 /** 1595 * Return true when the next encoded value in {@code src} uses BlobVar encoding, false otherwise. 1596 */ 1597 public static boolean isBlobVar(PositionedByteRange src) { 1598 return BLOB_VAR 1599 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1600 } 1601 1602 /** 1603 * Return true when the next encoded value in {@code src} uses BlobCopy encoding, false otherwise. 1604 */ 1605 public static boolean isBlobCopy(PositionedByteRange src) { 1606 return BLOB_COPY 1607 == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek()); 1608 } 1609 1610 /** 1611 * Skip {@code buff}'s position forward over one encoded value. 1612 * @return number of bytes skipped. 1613 */ 1614 public static int skip(PositionedByteRange src) { 1615 final int start = src.getPosition(); 1616 byte header = src.get(); 1617 Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING; 1618 header = ord.apply(header); 1619 1620 switch (header) { 1621 case NULL: 1622 case NEG_INF: 1623 return 1; 1624 case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */ 1625 skipVaruint64(src, DESCENDING != ord); 1626 skipSignificand(src, DESCENDING != ord); 1627 return src.getPosition() - start; 1628 case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */ 1629 case NEG_MED_MIN + 0x01: 1630 case NEG_MED_MIN + 0x02: 1631 case NEG_MED_MIN + 0x03: 1632 case NEG_MED_MIN + 0x04: 1633 case NEG_MED_MIN + 0x05: 1634 case NEG_MED_MIN + 0x06: 1635 case NEG_MED_MIN + 0x07: 1636 case NEG_MED_MIN + 0x08: 1637 case NEG_MED_MIN + 0x09: 1638 case NEG_MED_MAX: 1639 skipSignificand(src, DESCENDING != ord); 1640 return src.getPosition() - start; 1641 case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */ 1642 skipVaruint64(src, DESCENDING == ord); 1643 skipSignificand(src, DESCENDING != ord); 1644 return src.getPosition() - start; 1645 case ZERO: 1646 return 1; 1647 case POS_SMALL: /* Small positive number: 0x16, ~-E, M */ 1648 skipVaruint64(src, DESCENDING != ord); 1649 skipSignificand(src, DESCENDING == ord); 1650 return src.getPosition() - start; 1651 case POS_MED_MIN: /* Medium positive number: 0x17+E, M */ 1652 case POS_MED_MIN + 0x01: 1653 case POS_MED_MIN + 0x02: 1654 case POS_MED_MIN + 0x03: 1655 case POS_MED_MIN + 0x04: 1656 case POS_MED_MIN + 0x05: 1657 case POS_MED_MIN + 0x06: 1658 case POS_MED_MIN + 0x07: 1659 case POS_MED_MIN + 0x08: 1660 case POS_MED_MIN + 0x09: 1661 case POS_MED_MAX: 1662 skipSignificand(src, DESCENDING == ord); 1663 return src.getPosition() - start; 1664 case POS_LARGE: /* Large positive number: 0x22, E, M */ 1665 skipVaruint64(src, DESCENDING == ord); 1666 skipSignificand(src, DESCENDING == ord); 1667 return src.getPosition() - start; 1668 case POS_INF: 1669 return 1; 1670 case NAN: 1671 return 1; 1672 case FIXED_INT8: 1673 src.setPosition(src.getPosition() + 1); 1674 return src.getPosition() - start; 1675 case FIXED_INT16: 1676 src.setPosition(src.getPosition() + 2); 1677 return src.getPosition() - start; 1678 case FIXED_INT32: 1679 src.setPosition(src.getPosition() + 4); 1680 return src.getPosition() - start; 1681 case FIXED_INT64: 1682 src.setPosition(src.getPosition() + 8); 1683 return src.getPosition() - start; 1684 case FIXED_FLOAT32: 1685 src.setPosition(src.getPosition() + 4); 1686 return src.getPosition() - start; 1687 case FIXED_FLOAT64: 1688 src.setPosition(src.getPosition() + 8); 1689 return src.getPosition() - start; 1690 case TEXT: 1691 // for null-terminated values, skip to the end. 1692 do { 1693 header = ord.apply(src.get()); 1694 } while (header != TERM); 1695 return src.getPosition() - start; 1696 case BLOB_VAR: 1697 // read until we find a 0 in the MSB 1698 do { 1699 header = ord.apply(src.get()); 1700 } while ((byte) (header & 0x80) != TERM); 1701 return src.getPosition() - start; 1702 case BLOB_COPY: 1703 if (Order.DESCENDING == ord) { 1704 // if descending, read to termination byte. 1705 do { 1706 header = ord.apply(src.get()); 1707 } while (header != TERM); 1708 return src.getPosition() - start; 1709 } else { 1710 // otherwise, just skip to the end. 1711 src.setPosition(src.getLength()); 1712 return src.getPosition() - start; 1713 } 1714 default: 1715 throw unexpectedHeader(header); 1716 } 1717 } 1718 1719 /** 1720 * Return the number of encoded entries remaining in {@code buff}. The state of {@code buff} is 1721 * not modified through use of this method. 1722 */ 1723 public static int length(PositionedByteRange buff) { 1724 PositionedByteRange b = 1725 new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength()); 1726 b.setPosition(buff.getPosition()); 1727 int cnt = 0; 1728 for (; isEncodedValue(b); skip(b), cnt++) 1729 ; 1730 return cnt; 1731 } 1732}