View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.zip.Checksum;
24  
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.io.DataOutputBuffer;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.util.Bytes;
29  import org.apache.hadoop.hbase.util.ChecksumFactory;
30  import org.apache.hadoop.hbase.util.ChecksumType;
31  
32  /**
33   * Utility methods to compute and validate checksums.
34   */
35  public class ChecksumUtil {
36  
37    /** This is used to reserve space in a byte buffer */
38    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
39  
40    /** 
41     * This is used by unit tests to make checksum failures throw an 
42     * exception instead of returning null. Returning a null value from 
43     * checksum validation will cause the higher layer to retry that 
44     * read with hdfs-level checksums. Instead, we would like checksum 
45     * failures to cause the entire unit test to fail.
46     */
47    private static boolean generateExceptions = false;
48  
49    /**
50     * Generates a checksum for all the data in indata. The checksum is
51     * written to outdata.
52     * @param indata input data stream
53     * @param startOffset starting offset in the indata stream from where to
54     *                    compute checkums from
55     * @param endOffset ending offset in the indata stream upto
56     *                   which checksums needs to be computed
57     * @param outData the output buffer where checksum values are written
58     * @param outOffset the starting offset in the outdata where the
59     *                  checksum values are written
60     * @param checksumType type of checksum
61     * @param bytesPerChecksum number of bytes per checksum value
62     */
63    static void generateChecksums(byte[] indata,
64      int startOffset, int endOffset, 
65      byte[] outdata, int outOffset,
66      ChecksumType checksumType,
67      int bytesPerChecksum) throws IOException {
68  
69      if (checksumType == ChecksumType.NULL) {
70        return; // No checkums for this block.
71      }
72  
73      Checksum checksum = checksumType.getChecksumObject();
74      int bytesLeft = endOffset - startOffset;
75      int chunkNum = 0;
76  
77      while (bytesLeft > 0) {
78        // generate the checksum for one chunk
79        checksum.reset();
80        int count = Math.min(bytesLeft, bytesPerChecksum);
81        checksum.update(indata, startOffset, count);
82  
83        // write the checksum value to the output buffer.
84        int cksumValue = (int)checksum.getValue();
85        outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
86        chunkNum++;
87        startOffset += count;
88        bytesLeft -= count;
89      }
90    }
91  
92    /**
93     * Validates that the data in the specified HFileBlock matches the
94     * checksum.  Generates the checksum for the data and
95     * then validate that it matches the value stored in the header.
96     * If there is a checksum mismatch, then return false. Otherwise
97     * return true.
98     * The header is extracted from the specified HFileBlock while the
99     * data-to-be-verified is extracted from 'data'.
100    */
101   static boolean validateBlockChecksum(Path path, HFileBlock block, 
102     byte[] data, int hdrSize) throws IOException {
103 
104     // If this is an older version of the block that does not have
105     // checksums, then return false indicating that checksum verification
106     // did not succeed. Actually, this methiod should never be called
107     // when the minorVersion is 0, thus this is a defensive check for a
108     // cannot-happen case. Since this is a cannot-happen case, it is
109     // better to return false to indicate a checksum validation failure.
110     if (block.getMinorVersion() < HFileBlock.MINOR_VERSION_WITH_CHECKSUM) {
111       return false;
112     }
113 
114     // Get a checksum object based on the type of checksum that is
115     // set in the HFileBlock header. A ChecksumType.NULL indicates that 
116     // the caller is not interested in validating checksums, so we
117     // always return true.
118     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
119     if (cktype == ChecksumType.NULL) {
120       return true; // No checkums validations needed for this block.
121     }
122     Checksum checksumObject = cktype.getChecksumObject();
123     checksumObject.reset();
124 
125     // read in the stored value of the checksum size from the header.
126     int bytesPerChecksum = block.getBytesPerChecksum();
127 
128     // bytesPerChecksum is always larger than the size of the header
129     if (bytesPerChecksum < hdrSize) {
130       String msg = "Unsupported value of bytesPerChecksum. " +
131                    " Minimum is " + hdrSize + 
132                    " but the configured value is " + bytesPerChecksum;
133       HFile.LOG.warn(msg);
134       return false;   // cannot happen case, unable to verify checksum
135     }
136     // Extract the header and compute checksum for the header.
137     ByteBuffer hdr = block.getBufferWithHeader();
138     checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
139 
140     int off = hdrSize;
141     int consumed = hdrSize;
142     int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
143     int cksumOffset = block.getOnDiskDataSizeWithHeader();
144     
145     // validate each chunk
146     while (bytesLeft > 0) {
147       int thisChunkSize = bytesPerChecksum - consumed;
148       int count = Math.min(bytesLeft, thisChunkSize);
149       checksumObject.update(data, off, count);
150 
151       int storedChecksum = Bytes.toInt(data, cksumOffset);
152       if (storedChecksum != (int)checksumObject.getValue()) {
153         String msg = "File " + path +
154                      " Stored checksum value of " + storedChecksum +
155                      " at offset " + cksumOffset +
156                      " does not match computed checksum " +
157                      checksumObject.getValue() +
158                      ", total data size " + data.length +
159                      " Checksum data range offset " + off + " len " + count +
160                      HFileBlock.toStringHeader(block.getBufferReadOnly());
161         HFile.LOG.warn(msg);
162         if (generateExceptions) {
163           throw new IOException(msg); // this is only for unit tests
164         } else {
165           return false;               // checksum validation failure
166         }
167       }
168       cksumOffset += HFileBlock.CHECKSUM_SIZE;
169       bytesLeft -= count; 
170       off += count;
171       consumed = 0;
172       checksumObject.reset();
173     }
174     return true; // checksum is valid
175   }
176 
177   /**
178    * Returns the number of bytes needed to store the checksums for
179    * a specified data size
180    * @param datasize number of bytes of data
181    * @param bytesPerChecksum number of bytes in a checksum chunk
182    * @return The number of bytes needed to store the checksum values
183    */
184   static long numBytes(long datasize, int bytesPerChecksum) {
185     return numChunks(datasize, bytesPerChecksum) * 
186                      HFileBlock.CHECKSUM_SIZE;
187   }
188 
189   /**
190    * Returns the number of checksum chunks needed to store the checksums for
191    * a specified data size
192    * @param datasize number of bytes of data
193    * @param bytesPerChecksum number of bytes in a checksum chunk
194    * @return The number of checksum chunks
195    */
196   static long numChunks(long datasize, int bytesPerChecksum) {
197     long numChunks = datasize/bytesPerChecksum;
198     if (datasize % bytesPerChecksum != 0) {
199       numChunks++;
200     }
201     return numChunks;
202   }
203 
204   /**
205    * Write dummy checksums to the end of the specified bytes array
206    * to reserve space for writing checksums later
207    * @param baos OutputStream to write dummy checkum values
208    * @param numBytes Number of bytes of data for which dummy checksums
209    *                 need to be generated
210    * @param bytesPerChecksum Number of bytes per checksum value
211    */
212   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
213     int numBytes, int bytesPerChecksum) throws IOException {
214     long numChunks = numChunks(numBytes, bytesPerChecksum);
215     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
216     while (bytesLeft > 0) {
217       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
218       baos.write(DUMMY_VALUE, 0, (int)count);
219       bytesLeft -= count;
220     }
221   }
222 
223   /**
224    * Mechanism to throw an exception in case of hbase checksum
225    * failure. This is used by unit tests only.
226    * @param value Setting this to true will cause hbase checksum
227    *              verification failures to generate exceptions.
228    */
229   public static void generateExceptionForChecksumFailureForTest(boolean value) {
230     generateExceptions = value;
231   }
232 }
233