View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.util.vint;
20  
21  import java.io.IOException;
22  import java.io.OutputStream;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  
26  /**
27   * UFInt is an abbreviation for Unsigned Fixed-width Integer.
28   *
29   * This class converts between positive ints and 1-4 bytes that represent the int.  All input ints
30   * must be positive.  Max values stored in N bytes are:
31   *
32   * N=1: 2^8  =>           256
33   * N=2: 2^16 =>        65,536
34   * N=3: 2^24 =>    16,777,216
35   * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
36   *
37   * This was created to get most of the memory savings of a variable length integer when encoding
38   * an array of input integers, but to fix the number of bytes for each integer to the number needed
39   * to store the maximum integer in the array.  This enables a binary search to be performed on the
40   * array of encoded integers.
41   *
42   * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes.  Note that if
43   * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
44   * numbers will also require 2 bytes.
45   *
46   * warnings:
47   *  * no input validation for max performance
48   *  * no negatives
49   */
50  @InterfaceAudience.Private
51  public class UFIntTool {
52  
53    private static final int NUM_BITS_IN_LONG = 64;
54  
55    public static long maxValueForNumBytes(int numBytes) {
56      return (1L << (numBytes * 8)) - 1;
57    }
58  
59    public static int numBytes(final long value) {
60      if (value == 0) {// 0 doesn't work with the formula below
61        return 1;
62      }
63      return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
64    }
65  
66    public static byte[] getBytes(int outputWidth, final long value) {
67      byte[] bytes = new byte[outputWidth];
68      writeBytes(outputWidth, value, bytes, 0);
69      return bytes;
70    }
71  
72    public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
73      bytes[offset + outputWidth - 1] = (byte) value;
74      for (int i = outputWidth - 2; i >= 0; --i) {
75        bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
76      }
77    }
78  
79    private static final long[] MASKS = new long[] {
80      (long) 255,
81  	  (long) 255 << 8,
82  	  (long) 255 << 16,
83  	  (long) 255 << 24,
84  	  (long) 255 << 32,
85  	  (long) 255 << 40,
86  	  (long) 255 << 48,
87  	  (long) 255 << 56
88    };
89  
90    public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
91      for (int i = outputWidth - 1; i >= 0; --i) {
92        os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
93      }
94    }
95  
96    public static long fromBytes(final byte[] bytes) {
97      long value = 0;
98      value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
99      for (int i = 1; i < bytes.length; ++i) {
100       value <<= 8;
101       value |= bytes[i] & 0xff;
102     }
103     return value;
104   }
105 
106   public static long fromBytes(final byte[] bytes, final int offset, final int width) {
107     long value = 0;
108     value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
109     for (int i = 1; i < width; ++i) {
110       value <<= 8;
111       value |= bytes[i + offset] & 0xff;
112     }
113     return value;
114   }
115 
116 }