View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.zip.Checksum;
24  
25  import org.apache.hadoop.fs.ChecksumException;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.fs.Path;
28  import org.apache.hadoop.hbase.util.ByteBufferUtils;
29  import org.apache.hadoop.hbase.util.Bytes;
30  import org.apache.hadoop.hbase.util.ChecksumType;
31  import org.apache.hadoop.util.DataChecksum;
32  
33  /**
34   * Utility methods to compute and validate checksums.
35   */
36  @InterfaceAudience.Private
37  public class ChecksumUtil {
38  
39    /** This is used to reserve space in a byte buffer */
40    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41  
42    /** 
43     * This is used by unit tests to make checksum failures throw an 
44     * exception instead of returning null. Returning a null value from 
45     * checksum validation will cause the higher layer to retry that 
46     * read with hdfs-level checksums. Instead, we would like checksum 
47     * failures to cause the entire unit test to fail.
48     */
49    private static boolean generateExceptions = false;
50  
51    /**
52     * Generates a checksum for all the data in indata. The checksum is
53     * written to outdata.
54     * @param indata input data stream
55     * @param startOffset starting offset in the indata stream from where to
56     *                    compute checkums from
57     * @param endOffset ending offset in the indata stream upto
58     *                   which checksums needs to be computed
59     * @param outdata the output buffer where checksum values are written
60     * @param outOffset the starting offset in the outdata where the
61     *                  checksum values are written
62     * @param checksumType type of checksum
63     * @param bytesPerChecksum number of bytes per checksum value
64     */
65    static void generateChecksums(byte[] indata,
66      int startOffset, int endOffset, 
67      byte[] outdata, int outOffset,
68      ChecksumType checksumType,
69      int bytesPerChecksum) throws IOException {
70  
71      if (checksumType == ChecksumType.NULL) {
72        return; // No checkums for this block.
73      }
74  
75      Checksum checksum = checksumType.getChecksumObject();
76      int bytesLeft = endOffset - startOffset;
77      int chunkNum = 0;
78  
79      while (bytesLeft > 0) {
80        // generate the checksum for one chunk
81        checksum.reset();
82        int count = Math.min(bytesLeft, bytesPerChecksum);
83        checksum.update(indata, startOffset, count);
84  
85        // write the checksum value to the output buffer.
86        int cksumValue = (int)checksum.getValue();
87        outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
88        chunkNum++;
89        startOffset += count;
90        bytesLeft -= count;
91      }
92    }
93  
94    /**
95     * Validates that the data in the specified HFileBlock matches the checksum. Generates the
96     * checksums for the data and then validate that it matches those stored in the end of the data.
97     * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
98     * @param path Path of the HFile to which the {@code data} belongs. Only used for logging.
99     * @param offset offset of the data being validated. Only used for logging.
100    * @param hdrSize Size of the block header in {@code data}. Only used for logging.
101    * @return True if checksum matches, else false.
102    */
103   static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize)
104       throws IOException {
105     // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
106     // so we always return true.
107     ChecksumType cktype =
108         ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
109     if (cktype == ChecksumType.NULL) {
110       return true; // No checkums validations needed for this block.
111     }
112     // read in the stored value of the checksum size from the header.
113     int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
114     int onDiskDataSizeWithHeader =
115         buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
116 
117     if (HFile.LOG.isTraceEnabled()) {
118       HFile.LOG.info("dataLength=" + buffer.capacity()
119           + ", sizeWithHeader=" + onDiskDataSizeWithHeader
120           + ", checksumType=" + cktype.getName()
121           + ", file=" + path.toString()
122           + ", offset=" + offset
123           + ", headerSize=" + hdrSize
124           + ", bytesPerChecksum=" + bytesPerChecksum);
125     }
126     // bytesPerChecksum is always larger than the size of the header
127     if (bytesPerChecksum < hdrSize) {
128       String msg = "Unsupported value of bytesPerChecksum. " +
129                    " Minimum is " + hdrSize + 
130                    " but the configured value is " + bytesPerChecksum;
131       HFile.LOG.warn(msg);
132       return false;   // cannot happen case, unable to verify checksum
133     }
134     byte[] data;
135     if (buffer.hasArray()) {
136       data = buffer.array();
137     } else {
138       data = ByteBufferUtils.toBytes(buffer, 0);
139     }
140 
141     Checksum checksumObject = cktype.getChecksumObject();
142     checksumObject.reset();
143     // Extract the header and compute checksum for the header.
144     checksumObject.update(data, 0, hdrSize);
145 
146     int off = hdrSize;
147     int consumed = hdrSize;
148     int cksumOffset = onDiskDataSizeWithHeader;
149     int bytesLeft = cksumOffset - off;
150 
151     // validate each chunk
152     while (bytesLeft > 0) {
153       int thisChunkSize = bytesPerChecksum - consumed;
154       int count = Math.min(bytesLeft, thisChunkSize);
155       checksumObject.update(data, off, count);
156 
157       int storedChecksum = Bytes.toInt(data, cksumOffset);
158       if (storedChecksum != (int)checksumObject.getValue()) {
159         String msg = "File " + path +
160                      " Stored checksum value of " + storedChecksum +
161                      " at offset " + cksumOffset +
162                      " does not match computed checksum " +
163                      checksumObject.getValue() +
164                      ", total data size " + data.length +
165                      " Checksum data range offset " + off + " len " + count +
166                      HFileBlock.toStringHeader(buffer);
167         HFile.LOG.warn(msg);
168         if (generateExceptions) {
169           throw new IOException(msg); // this is only for unit tests
170         } else {
171           return false;               // checksum validation failure
172         }
173       }
174       cksumOffset += HFileBlock.CHECKSUM_SIZE;
175       bytesLeft -= count; 
176       off += count;
177       consumed = 0;
178       checksumObject.reset();
179     }
180     return true; // checksum is valid
181   }
182 
183   /**
184    * Returns the number of bytes needed to store the checksums for
185    * a specified data size
186    * @param datasize number of bytes of data
187    * @param bytesPerChecksum number of bytes in a checksum chunk
188    * @return The number of bytes needed to store the checksum values
189    */
190   static long numBytes(long datasize, int bytesPerChecksum) {
191     return numChunks(datasize, bytesPerChecksum) * 
192                      HFileBlock.CHECKSUM_SIZE;
193   }
194 
195   /**
196    * Returns the number of checksum chunks needed to store the checksums for
197    * a specified data size
198    * @param datasize number of bytes of data
199    * @param bytesPerChecksum number of bytes in a checksum chunk
200    * @return The number of checksum chunks
201    */
202   static long numChunks(long datasize, int bytesPerChecksum) {
203     long numChunks = datasize/bytesPerChecksum;
204     if (datasize % bytesPerChecksum != 0) {
205       numChunks++;
206     }
207     return numChunks;
208   }
209 
210   /**
211    * Write dummy checksums to the end of the specified bytes array
212    * to reserve space for writing checksums later
213    * @param baos OutputStream to write dummy checkum values
214    * @param numBytes Number of bytes of data for which dummy checksums
215    *                 need to be generated
216    * @param bytesPerChecksum Number of bytes per checksum value
217    */
218   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
219     int numBytes, int bytesPerChecksum) throws IOException {
220     long numChunks = numChunks(numBytes, bytesPerChecksum);
221     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
222     while (bytesLeft > 0) {
223       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
224       baos.write(DUMMY_VALUE, 0, (int)count);
225       bytesLeft -= count;
226     }
227   }
228 
229   /**
230    * Mechanism to throw an exception in case of hbase checksum
231    * failure. This is used by unit tests only.
232    * @param value Setting this to true will cause hbase checksum
233    *              verification failures to generate exceptions.
234    */
235   public static void generateExceptionForChecksumFailureForTest(boolean value) {
236     generateExceptions = value;
237   }
238 }
239