View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.zip.Checksum;
24  
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.hbase.util.Bytes;
27  import org.apache.hadoop.hbase.util.ChecksumType;
28  
29  /**
30   * Utility methods to compute and validate checksums.
31   */
32  public class ChecksumUtil {
33  
34    /** This is used to reserve space in a byte buffer */
35    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
36  
37    /** 
38     * This is used by unit tests to make checksum failures throw an 
39     * exception instead of returning null. Returning a null value from 
40     * checksum validation will cause the higher layer to retry that 
41     * read with hdfs-level checksums. Instead, we would like checksum 
42     * failures to cause the entire unit test to fail.
43     */
44    private static boolean generateExceptions = false;
45  
46    /**
47     * Generates a checksum for all the data in indata. The checksum is
48     * written to outdata.
49     * @param indata input data stream
50     * @param startOffset starting offset in the indata stream from where to
51     *                    compute checkums from
52     * @param endOffset ending offset in the indata stream upto
53     *                   which checksums needs to be computed
54     * @param outdata the output buffer where checksum values are written
55     * @param outOffset the starting offset in the outdata where the
56     *                  checksum values are written
57     * @param checksumType type of checksum
58     * @param bytesPerChecksum number of bytes per checksum value
59     */
60    static void generateChecksums(byte[] indata,
61      int startOffset, int endOffset, 
62      byte[] outdata, int outOffset,
63      ChecksumType checksumType,
64      int bytesPerChecksum) throws IOException {
65  
66      if (checksumType == ChecksumType.NULL) {
67        return; // No checkums for this block.
68      }
69  
70      Checksum checksum = checksumType.getChecksumObject();
71      int bytesLeft = endOffset - startOffset;
72      int chunkNum = 0;
73  
74      while (bytesLeft > 0) {
75        // generate the checksum for one chunk
76        checksum.reset();
77        int count = Math.min(bytesLeft, bytesPerChecksum);
78        checksum.update(indata, startOffset, count);
79  
80        // write the checksum value to the output buffer.
81        int cksumValue = (int)checksum.getValue();
82        outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
83        chunkNum++;
84        startOffset += count;
85        bytesLeft -= count;
86      }
87    }
88  
89    /**
90     * Validates that the data in the specified HFileBlock matches the
91     * checksum.  Generates the checksum for the data and
92     * then validate that it matches the value stored in the header.
93     * If there is a checksum mismatch, then return false. Otherwise
94     * return true.
95     * The header is extracted from the specified HFileBlock while the
96     * data-to-be-verified is extracted from 'data'.
97     */
98    static boolean validateBlockChecksum(Path path, HFileBlock block, 
99      byte[] data, int hdrSize) throws IOException {
100 
101     // If this is an older version of the block that does not have
102     // checksums, then return false indicating that checksum verification
103     // did not succeed. Actually, this methiod should never be called
104     // when the minorVersion is 0, thus this is a defensive check for a
105     // cannot-happen case. Since this is a cannot-happen case, it is
106     // better to return false to indicate a checksum validation failure.
107     if (!block.getHFileContext().isUseHBaseChecksum()) {
108       return false;
109     }
110 
111     // Get a checksum object based on the type of checksum that is
112     // set in the HFileBlock header. A ChecksumType.NULL indicates that 
113     // the caller is not interested in validating checksums, so we
114     // always return true.
115     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
116     if (cktype == ChecksumType.NULL) {
117       return true; // No checkums validations needed for this block.
118     }
119     Checksum checksumObject = cktype.getChecksumObject();
120     checksumObject.reset();
121 
122     // read in the stored value of the checksum size from the header.
123     int bytesPerChecksum = block.getBytesPerChecksum();
124 
125     // bytesPerChecksum is always larger than the size of the header
126     if (bytesPerChecksum < hdrSize) {
127       String msg = "Unsupported value of bytesPerChecksum. " +
128                    " Minimum is " + hdrSize + 
129                    " but the configured value is " + bytesPerChecksum;
130       HFile.LOG.warn(msg);
131       return false;   // cannot happen case, unable to verify checksum
132     }
133     // Extract the header and compute checksum for the header.
134     ByteBuffer hdr = block.getBufferWithHeader();
135     checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize);
136 
137     int off = hdrSize;
138     int consumed = hdrSize;
139     int bytesLeft = block.getOnDiskDataSizeWithHeader() - off;
140     int cksumOffset = block.getOnDiskDataSizeWithHeader();
141     
142     // validate each chunk
143     while (bytesLeft > 0) {
144       int thisChunkSize = bytesPerChecksum - consumed;
145       int count = Math.min(bytesLeft, thisChunkSize);
146       checksumObject.update(data, off, count);
147 
148       int storedChecksum = Bytes.toInt(data, cksumOffset);
149       if (storedChecksum != (int)checksumObject.getValue()) {
150         String msg = "File " + path +
151                      " Stored checksum value of " + storedChecksum +
152                      " at offset " + cksumOffset +
153                      " does not match computed checksum " +
154                      checksumObject.getValue() +
155                      ", total data size " + data.length +
156                      " Checksum data range offset " + off + " len " + count +
157                      HFileBlock.toStringHeader(block.getBufferReadOnly());
158         HFile.LOG.warn(msg);
159         if (generateExceptions) {
160           throw new IOException(msg); // this is only for unit tests
161         } else {
162           return false;               // checksum validation failure
163         }
164       }
165       cksumOffset += HFileBlock.CHECKSUM_SIZE;
166       bytesLeft -= count; 
167       off += count;
168       consumed = 0;
169       checksumObject.reset();
170     }
171     return true; // checksum is valid
172   }
173 
174   /**
175    * Returns the number of bytes needed to store the checksums for
176    * a specified data size
177    * @param datasize number of bytes of data
178    * @param bytesPerChecksum number of bytes in a checksum chunk
179    * @return The number of bytes needed to store the checksum values
180    */
181   static long numBytes(long datasize, int bytesPerChecksum) {
182     return numChunks(datasize, bytesPerChecksum) * 
183                      HFileBlock.CHECKSUM_SIZE;
184   }
185 
186   /**
187    * Returns the number of checksum chunks needed to store the checksums for
188    * a specified data size
189    * @param datasize number of bytes of data
190    * @param bytesPerChecksum number of bytes in a checksum chunk
191    * @return The number of checksum chunks
192    */
193   static long numChunks(long datasize, int bytesPerChecksum) {
194     long numChunks = datasize/bytesPerChecksum;
195     if (datasize % bytesPerChecksum != 0) {
196       numChunks++;
197     }
198     return numChunks;
199   }
200 
201   /**
202    * Write dummy checksums to the end of the specified bytes array
203    * to reserve space for writing checksums later
204    * @param baos OutputStream to write dummy checkum values
205    * @param numBytes Number of bytes of data for which dummy checksums
206    *                 need to be generated
207    * @param bytesPerChecksum Number of bytes per checksum value
208    */
209   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
210     int numBytes, int bytesPerChecksum) throws IOException {
211     long numChunks = numChunks(numBytes, bytesPerChecksum);
212     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
213     while (bytesLeft > 0) {
214       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
215       baos.write(DUMMY_VALUE, 0, (int)count);
216       bytesLeft -= count;
217     }
218   }
219 
220   /**
221    * Mechanism to throw an exception in case of hbase checksum
222    * failure. This is used by unit tests only.
223    * @param value Setting this to true will cause hbase checksum
224    *              verification failures to generate exceptions.
225    */
226   public static void generateExceptionForChecksumFailureForTest(boolean value) {
227     generateExceptions = value;
228   }
229 }
230