View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.fs.ChecksumException;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.util.ChecksumType;
29  import org.apache.hadoop.util.DataChecksum;
30  
31  /**
32   * Utility methods to compute and validate checksums.
33   */
34  @InterfaceAudience.Private
35  public class ChecksumUtil {
36    public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
37  
38    /** This is used to reserve space in a byte buffer */
39    private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
40  
41    /**
42     * This is used by unit tests to make checksum failures throw an
43     * exception instead of returning null. Returning a null value from
44     * checksum validation will cause the higher layer to retry that
45     * read with hdfs-level checksums. Instead, we would like checksum
46     * failures to cause the entire unit test to fail.
47     */
48    private static boolean generateExceptions = false;
49  
50    /**
51     * Generates a checksum for all the data in indata. The checksum is
52     * written to outdata.
53     * @param indata input data stream
54     * @param startOffset starting offset in the indata stream from where to
55     *                    compute checkums from
56     * @param endOffset ending offset in the indata stream upto
57     *                   which checksums needs to be computed
58     * @param outdata the output buffer where checksum values are written
59     * @param outOffset the starting offset in the outdata where the
60     *                  checksum values are written
61     * @param checksumType type of checksum
62     * @param bytesPerChecksum number of bytes per checksum value
63     */
64    static void generateChecksums(byte[] indata, int startOffset, int endOffset,
65      byte[] outdata, int outOffset, ChecksumType checksumType,
66      int bytesPerChecksum) throws IOException {
67  
68      if (checksumType == ChecksumType.NULL) {
69        return; // No checksum for this block.
70      }
71  
72      DataChecksum checksum = DataChecksum.newDataChecksum(
73          checksumType.getDataChecksumType(), bytesPerChecksum);
74  
75      checksum.calculateChunkedSums(
76         ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
77         ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
78    }
79  
80    /**
81     * Validates that the data in the specified HFileBlock matches the
82     * checksum.  Generates the checksum for the data and
83     * then validate that it matches the value stored in the header.
84     * If there is a checksum mismatch, then return false. Otherwise
85     * return true.
86     * The header is extracted from the specified HFileBlock while the
87     * data-to-be-verified is extracted from 'data'.
88     */
89    static boolean validateBlockChecksum(String pathName, long offset, HFileBlock block,
90      byte[] data, int hdrSize) throws IOException {
91  
92      // If this is an older version of the block that does not have
93      // checksums, then return false indicating that checksum verification
94      // did not succeed. Actually, this method should never be called
95      // when the minorVersion is 0, thus this is a defensive check for a
96      // cannot-happen case. Since this is a cannot-happen case, it is
97      // better to return false to indicate a checksum validation failure.
98      if (!block.getHFileContext().isUseHBaseChecksum()) {
99        return false;
100     }
101 
102     // Get a checksum object based on the type of checksum that is
103     // set in the HFileBlock header. A ChecksumType.NULL indicates that
104     // the caller is not interested in validating checksums, so we
105     // always return true.
106     ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
107     if (cktype == ChecksumType.NULL) {
108       return true; // No checksum validations needed for this block.
109     }
110 
111     // read in the stored value of the checksum size from the header.
112     int bytesPerChecksum = block.getBytesPerChecksum();
113 
114     DataChecksum dataChecksum = DataChecksum.newDataChecksum(
115         cktype.getDataChecksumType(), bytesPerChecksum);
116     assert dataChecksum != null;
117     int sizeWithHeader =  block.getOnDiskDataSizeWithHeader();
118     if (LOG.isTraceEnabled()) {
119       LOG.info("dataLength=" + data.length
120           + ", sizeWithHeader=" + sizeWithHeader
121           + ", checksumType=" + cktype.getName()
122           + ", file=" + pathName
123           + ", offset=" + offset
124           + ", headerSize=" + hdrSize
125           + ", bytesPerChecksum=" + bytesPerChecksum);
126     }
127     try {
128       dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
129           ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), pathName, 0);
130     } catch (ChecksumException e) {
131       return false;
132     }
133     return true;  // checksum is valid
134   }
135 
136   /**
137    * Returns the number of bytes needed to store the checksums for
138    * a specified data size
139    * @param datasize number of bytes of data
140    * @param bytesPerChecksum number of bytes in a checksum chunk
141    * @return The number of bytes needed to store the checksum values
142    */
143   static long numBytes(long datasize, int bytesPerChecksum) {
144     return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE;
145   }
146 
147   /**
148    * Returns the number of checksum chunks needed to store the checksums for
149    * a specified data size
150    * @param datasize number of bytes of data
151    * @param bytesPerChecksum number of bytes in a checksum chunk
152    * @return The number of checksum chunks
153    */
154   static long numChunks(long datasize, int bytesPerChecksum) {
155     long numChunks = datasize/bytesPerChecksum;
156     if (datasize % bytesPerChecksum != 0) {
157       numChunks++;
158     }
159     return numChunks;
160   }
161 
162   /**
163    * Write dummy checksums to the end of the specified bytes array
164    * to reserve space for writing checksums later
165    * @param baos OutputStream to write dummy checkum values
166    * @param numBytes Number of bytes of data for which dummy checksums
167    *                 need to be generated
168    * @param bytesPerChecksum Number of bytes per checksum value
169    */
170   static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
171     int numBytes, int bytesPerChecksum) throws IOException {
172     long numChunks = numChunks(numBytes, bytesPerChecksum);
173     long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
174     while (bytesLeft > 0) {
175       long count = Math.min(bytesLeft, DUMMY_VALUE.length);
176       baos.write(DUMMY_VALUE, 0, (int)count);
177       bytesLeft -= count;
178     }
179   }
180 
181   /**
182    * Mechanism to throw an exception in case of hbase checksum
183    * failure. This is used by unit tests only.
184    * @param value Setting this to true will cause hbase checksum
185    *              verification failures to generate exceptions.
186    */
187   public static void generateExceptionForChecksumFailureForTest(boolean value) {
188     generateExceptions = value;
189   }
190 }
191