View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.fs.ChecksumException;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.util.ChecksumType;
29  import org.apache.hadoop.util.DataChecksum;
30  
31  /**
32   * Utility methods to compute and validate checksums.
33   */
34  @InterfaceAudience.Private
35  public class ChecksumUtil {
36    public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
37  
38    /** This is used to reserve space in a byte buffer */
39    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
40  
41    /**
42     * This is used by unit tests to make checksum failures throw an
43     * exception instead of returning null. Returning a null value from
44     * checksum validation will cause the higher layer to retry that
45     * read with hdfs-level checksums. Instead, we would like checksum
46     * failures to cause the entire unit test to fail.
47     */
48    private static boolean generateExceptions = false;
49  
50    /**
51     * Generates a checksum for all the data in indata. The checksum is
52     * written to outdata.
53     * @param indata input data stream
54     * @param startOffset starting offset in the indata stream from where to
55     *                    compute checkums from
56     * @param endOffset ending offset in the indata stream upto
57     *                   which checksums needs to be computed
58     * @param outdata the output buffer where checksum values are written
59     * @param outOffset the starting offset in the outdata where the
60     *                  checksum values are written
61     * @param checksumType type of checksum
62     * @param bytesPerChecksum number of bytes per checksum value
63     */
64    static void generateChecksums(byte[] indata, int startOffset, int endOffset,
65      byte[] outdata, int outOffset, ChecksumType checksumType,
66      int bytesPerChecksum) throws IOException {
67  
68      if (checksumType == ChecksumType.NULL) {
69        return; // No checksum for this block.
70      }
71  
72      DataChecksum checksum = DataChecksum.newDataChecksum(
73          checksumType.getDataChecksumType(), bytesPerChecksum);
74  
75      checksum.calculateChunkedSums(
76         ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
77         ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
78    }
79  
80    /**
81     * Validates that the data in the specified HFileBlock matches the checksum. Generates the
82     * checksums for the data and then validate that it matches those stored in the end of the data.
83     * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
84     * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
85     * @param offset offset of the data being validated. Only used for logging.
86     * @param hdrSize Size of the block header in {@code data}. Only used for logging.
87     * @return True if checksum matches, else false.
88     */
89    static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
90        throws IOException {
91      // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
92      // so we always return true.
93      ChecksumType cktype =
94          ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
95      if (cktype == ChecksumType.NULL) {
96        return true; // No checksum validations needed for this block.
97      }
98
99      // read in the stored value of the checksum size from the header.
100     int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
101 
102     DataChecksum dataChecksum = DataChecksum.newDataChecksum(
103         cktype.getDataChecksumType(), bytesPerChecksum);
104     assert dataChecksum != null;
105     int onDiskDataSizeWithHeader =
106         buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
107     if (LOG.isTraceEnabled()) {
108       LOG.info("dataLength=" + buffer.capacity()
109           + ", sizeWithHeader=" + onDiskDataSizeWithHeader
110           + ", checksumType=" + cktype.getName()
111           + ", file=" + pathName
112           + ", offset=" + offset
113           + ", headerSize=" + hdrSize
114           + ", bytesPerChecksum=" + bytesPerChecksum);
115     }
116     try {
117       ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
118       ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
119           .limit(buffer.capacity());
120       dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
121     } catch (ChecksumException e) {
122       return false;
123     }
124     return true;  // checksum is valid
125   }
126
127   /**
128    * Returns the number of bytes needed to store the checksums for
129    * a specified data size
130    * @param datasize number of bytes of data
131    * @param bytesPerChecksum number of bytes in a checksum chunk
132    * @return The number of bytes needed to store the checksum values
133    */
134   static long numBytes(long datasize, int bytesPerChecksum) {
135     return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE;
136   }
137
138   /**
139    * Returns the number of checksum chunks needed to store the checksums for
140    * a specified data size
141    * @param datasize number of bytes of data
142    * @param bytesPerChecksum number of bytes in a checksum chunk
143    * @return The number of checksum chunks
144    */
145   static long numChunks(long datasize, int bytesPerChecksum) {
146     long numChunks = datasize/bytesPerChecksum;
147     if (datasize % bytesPerChecksum != 0) {
148       numChunks++;
149     }
150     return numChunks;
151   }
152
153   /**
154    * Write dummy checksums to the end of the specified bytes array
155    * to reserve space for writing checksums later
156    * @param baos OutputStream to write dummy checkum values
157    * @param numBytes Number of bytes of data for which dummy checksums
158    *                 need to be generated
159    * @param bytesPerChecksum Number of bytes per checksum value
160    */
161   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
162     int numBytes, int bytesPerChecksum) throws IOException {
163     long numChunks = numChunks(numBytes, bytesPerChecksum);
164     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
165     while (bytesLeft > 0) {
166       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
167       baos.write(DUMMY_VALUE, 0, (int)count);
168       bytesLeft -= count;
169     }
170   }
171
172   /**
173    * Mechanism to throw an exception in case of hbase checksum
174    * failure. This is used by unit tests only.
175    * @param value Setting this to true will cause hbase checksum
176    *              verification failures to generate exceptions.
177    */
178   public static void generateExceptionForChecksumFailureForTest(boolean value) {
179     generateExceptions = value;
180   }
181 }
182