View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.fs.ChecksumException;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.util.ChecksumType;
30  import org.apache.hadoop.util.DataChecksum;
31  
32  /**
33   * Utility methods to compute and validate checksums.
34   */
35  @InterfaceAudience.Private
36  public class ChecksumUtil {
37    public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
38  
39    /** This is used to reserve space in a byte buffer */
40    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41  
42    /**
43     * This is used by unit tests to make checksum failures throw an
44     * exception instead of returning null. Returning a null value from
45     * checksum validation will cause the higher layer to retry that
46     * read with hdfs-level checksums. Instead, we would like checksum
47     * failures to cause the entire unit test to fail.
48     */
49    private static boolean generateExceptions = false;
50  
51    /**
52     * Generates a checksum for all the data in indata. The checksum is
53     * written to outdata.
54     * @param indata input data stream
55     * @param startOffset starting offset in the indata stream from where to
56     *                    compute checkums from
57     * @param endOffset ending offset in the indata stream upto
58     *                   which checksums needs to be computed
59     * @param outdata the output buffer where checksum values are written
60     * @param outOffset the starting offset in the outdata where the
61     *                  checksum values are written
62     * @param checksumType type of checksum
63     * @param bytesPerChecksum number of bytes per checksum value
64     */
65    static void generateChecksums(byte[] indata, int startOffset, int endOffset,
66      byte[] outdata, int outOffset, ChecksumType checksumType,
67      int bytesPerChecksum) throws IOException {
68  
69      if (checksumType == ChecksumType.NULL) {
70        return; // No checksum for this block.
71      }
72  
73      DataChecksum checksum = DataChecksum.newDataChecksum(
74          checksumType.getDataChecksumType(), bytesPerChecksum);
75  
76      checksum.calculateChunkedSums(
77         ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
78         ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
79    }
80  
81    /**
82     * Validates that the data in the specified HFileBlock matches the checksum. Generates the
83     * checksums for the data and then validate that it matches those stored in the end of the data.
84     * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
85     * @param path Path of the HFile to which the {@code data} belongs. Only used for logging.
86     * @param offset offset of the data being validated. Only used for logging.
87     * @param hdrSize Size of the block header in {@code data}. Only used for logging.
88     * @return True if checksum matches, else false.
89     */
90    static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize)
91        throws IOException {
92      // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
93      // so we always return true.
94      ChecksumType cktype =
95          ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
96      if (cktype == ChecksumType.NULL) {
97        return true; // No checksum validations needed for this block.
98      }
99  
100     // read in the stored value of the checksum size from the header.
101     int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
102 
103     DataChecksum dataChecksum = DataChecksum.newDataChecksum(
104         cktype.getDataChecksumType(), bytesPerChecksum);
105     assert dataChecksum != null;
106     int onDiskDataSizeWithHeader =
107         buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
108     if (LOG.isTraceEnabled()) {
109       LOG.info("dataLength=" + buffer.capacity()
110           + ", sizeWithHeader=" + onDiskDataSizeWithHeader
111           + ", checksumType=" + cktype.getName()
112           + ", file=" + path.toString()
113           + ", offset=" + offset
114           + ", headerSize=" + hdrSize
115           + ", bytesPerChecksum=" + bytesPerChecksum);
116     }
117     try {
118       ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
119       ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
120           .limit(buffer.capacity());
121       dataChecksum.verifyChunkedSums(data, checksums, path.toString(), 0);
122     } catch (ChecksumException e) {
123       return false;
124     }
125     return true;  // checksum is valid
126   }
127 
128   /**
129    * Returns the number of bytes needed to store the checksums for
130    * a specified data size
131    * @param datasize number of bytes of data
132    * @param bytesPerChecksum number of bytes in a checksum chunk
133    * @return The number of bytes needed to store the checksum values
134    */
135   static long numBytes(long datasize, int bytesPerChecksum) {
136     return numChunks(datasize, bytesPerChecksum) *
137                      HFileBlock.CHECKSUM_SIZE;
138   }
139 
140   /**
141    * Returns the number of checksum chunks needed to store the checksums for
142    * a specified data size
143    * @param datasize number of bytes of data
144    * @param bytesPerChecksum number of bytes in a checksum chunk
145    * @return The number of checksum chunks
146    */
147   static long numChunks(long datasize, int bytesPerChecksum) {
148     long numChunks = datasize/bytesPerChecksum;
149     if (datasize % bytesPerChecksum != 0) {
150       numChunks++;
151     }
152     return numChunks;
153   }
154 
155   /**
156    * Write dummy checksums to the end of the specified bytes array
157    * to reserve space for writing checksums later
158    * @param baos OutputStream to write dummy checkum values
159    * @param numBytes Number of bytes of data for which dummy checksums
160    *                 need to be generated
161    * @param bytesPerChecksum Number of bytes per checksum value
162    */
163   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
164     int numBytes, int bytesPerChecksum) throws IOException {
165     long numChunks = numChunks(numBytes, bytesPerChecksum);
166     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
167     while (bytesLeft > 0) {
168       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
169       baos.write(DUMMY_VALUE, 0, (int)count);
170       bytesLeft -= count;
171     }
172   }
173 
174   /**
175    * Mechanism to throw an exception in case of hbase checksum
176    * failure. This is used by unit tests only.
177    * @param value Setting this to true will cause hbase checksum
178    *              verification failures to generate exceptions.
179    */
180   public static void generateExceptionForChecksumFailureForTest(boolean value) {
181     generateExceptions = value;
182   }
183 }
184