1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 import java.util.zip.Checksum;
24
25 import org.apache.hadoop.fs.ChecksumException;
26 import org.apache.hadoop.hbase.classification.InterfaceAudience;
27 import org.apache.hadoop.fs.Path;
28 import org.apache.hadoop.hbase.util.ByteBufferUtils;
29 import org.apache.hadoop.hbase.util.Bytes;
30 import org.apache.hadoop.hbase.util.ChecksumType;
31 import org.apache.hadoop.util.DataChecksum;
32
33 /**
34 * Utility methods to compute and validate checksums.
35 */
36 @InterfaceAudience.Private
37 public class ChecksumUtil {
38
39 /** This is used to reserve space in a byte buffer */
40 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41
42 /**
43 * This is used by unit tests to make checksum failures throw an
44 * exception instead of returning null. Returning a null value from
45 * checksum validation will cause the higher layer to retry that
46 * read with hdfs-level checksums. Instead, we would like checksum
47 * failures to cause the entire unit test to fail.
48 */
49 private static boolean generateExceptions = false;
50
51 /**
52 * Generates a checksum for all the data in indata. The checksum is
53 * written to outdata.
54 * @param indata input data stream
55 * @param startOffset starting offset in the indata stream from where to
56 * compute checkums from
57 * @param endOffset ending offset in the indata stream upto
58 * which checksums needs to be computed
59 * @param outdata the output buffer where checksum values are written
60 * @param outOffset the starting offset in the outdata where the
61 * checksum values are written
62 * @param checksumType type of checksum
63 * @param bytesPerChecksum number of bytes per checksum value
64 */
65 static void generateChecksums(byte[] indata,
66 int startOffset, int endOffset,
67 byte[] outdata, int outOffset,
68 ChecksumType checksumType,
69 int bytesPerChecksum) throws IOException {
70
71 if (checksumType == ChecksumType.NULL) {
72 return; // No checkums for this block.
73 }
74
75 Checksum checksum = checksumType.getChecksumObject();
76 int bytesLeft = endOffset - startOffset;
77 int chunkNum = 0;
78
79 while (bytesLeft > 0) {
80 // generate the checksum for one chunk
81 checksum.reset();
82 int count = Math.min(bytesLeft, bytesPerChecksum);
83 checksum.update(indata, startOffset, count);
84
85 // write the checksum value to the output buffer.
86 int cksumValue = (int)checksum.getValue();
87 outOffset = Bytes.putInt(outdata, outOffset, cksumValue);
88 chunkNum++;
89 startOffset += count;
90 bytesLeft -= count;
91 }
92 }
93
94 /**
95 * Validates that the data in the specified HFileBlock matches the checksum. Generates the
96 * checksums for the data and then validate that it matches those stored in the end of the data.
97 * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
98 * @param path Path of the HFile to which the {@code data} belongs. Only used for logging.
99 * @param offset offset of the data being validated. Only used for logging.
100 * @param hdrSize Size of the block header in {@code data}. Only used for logging.
101 * @return True if checksum matches, else false.
102 */
103 static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize)
104 throws IOException {
105 // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
106 // so we always return true.
107 ChecksumType cktype =
108 ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
109 if (cktype == ChecksumType.NULL) {
110 return true; // No checkums validations needed for this block.
111 }
112 // read in the stored value of the checksum size from the header.
113 int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
114 int onDiskDataSizeWithHeader =
115 buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
116
117 if (HFile.LOG.isTraceEnabled()) {
118 HFile.LOG.info("dataLength=" + buffer.capacity()
119 + ", sizeWithHeader=" + onDiskDataSizeWithHeader
120 + ", checksumType=" + cktype.getName()
121 + ", file=" + path.toString()
122 + ", offset=" + offset
123 + ", headerSize=" + hdrSize
124 + ", bytesPerChecksum=" + bytesPerChecksum);
125 }
126 // bytesPerChecksum is always larger than the size of the header
127 if (bytesPerChecksum < hdrSize) {
128 String msg = "Unsupported value of bytesPerChecksum. " +
129 " Minimum is " + hdrSize +
130 " but the configured value is " + bytesPerChecksum;
131 HFile.LOG.warn(msg);
132 return false; // cannot happen case, unable to verify checksum
133 }
134 byte[] data;
135 if (buffer.hasArray()) {
136 data = buffer.array();
137 } else {
138 data = ByteBufferUtils.toBytes(buffer, 0);
139 }
140
141 Checksum checksumObject = cktype.getChecksumObject();
142 checksumObject.reset();
143 // Extract the header and compute checksum for the header.
144 checksumObject.update(data, 0, hdrSize);
145
146 int off = hdrSize;
147 int consumed = hdrSize;
148 int cksumOffset = onDiskDataSizeWithHeader;
149 int bytesLeft = cksumOffset - off;
150
151 // validate each chunk
152 while (bytesLeft > 0) {
153 int thisChunkSize = bytesPerChecksum - consumed;
154 int count = Math.min(bytesLeft, thisChunkSize);
155 checksumObject.update(data, off, count);
156
157 int storedChecksum = Bytes.toInt(data, cksumOffset);
158 if (storedChecksum != (int)checksumObject.getValue()) {
159 String msg = "File " + path +
160 " Stored checksum value of " + storedChecksum +
161 " at offset " + cksumOffset +
162 " does not match computed checksum " +
163 checksumObject.getValue() +
164 ", total data size " + data.length +
165 " Checksum data range offset " + off + " len " + count +
166 HFileBlock.toStringHeader(buffer);
167 HFile.LOG.warn(msg);
168 if (generateExceptions) {
169 throw new IOException(msg); // this is only for unit tests
170 } else {
171 return false; // checksum validation failure
172 }
173 }
174 cksumOffset += HFileBlock.CHECKSUM_SIZE;
175 bytesLeft -= count;
176 off += count;
177 consumed = 0;
178 checksumObject.reset();
179 }
180 return true; // checksum is valid
181 }
182
183 /**
184 * Returns the number of bytes needed to store the checksums for
185 * a specified data size
186 * @param datasize number of bytes of data
187 * @param bytesPerChecksum number of bytes in a checksum chunk
188 * @return The number of bytes needed to store the checksum values
189 */
190 static long numBytes(long datasize, int bytesPerChecksum) {
191 return numChunks(datasize, bytesPerChecksum) *
192 HFileBlock.CHECKSUM_SIZE;
193 }
194
195 /**
196 * Returns the number of checksum chunks needed to store the checksums for
197 * a specified data size
198 * @param datasize number of bytes of data
199 * @param bytesPerChecksum number of bytes in a checksum chunk
200 * @return The number of checksum chunks
201 */
202 static long numChunks(long datasize, int bytesPerChecksum) {
203 long numChunks = datasize/bytesPerChecksum;
204 if (datasize % bytesPerChecksum != 0) {
205 numChunks++;
206 }
207 return numChunks;
208 }
209
210 /**
211 * Write dummy checksums to the end of the specified bytes array
212 * to reserve space for writing checksums later
213 * @param baos OutputStream to write dummy checkum values
214 * @param numBytes Number of bytes of data for which dummy checksums
215 * need to be generated
216 * @param bytesPerChecksum Number of bytes per checksum value
217 */
218 static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
219 int numBytes, int bytesPerChecksum) throws IOException {
220 long numChunks = numChunks(numBytes, bytesPerChecksum);
221 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
222 while (bytesLeft > 0) {
223 long count = Math.min(bytesLeft, DUMMY_VALUE.length);
224 baos.write(DUMMY_VALUE, 0, (int)count);
225 bytesLeft -= count;
226 }
227 }
228
229 /**
230 * Mechanism to throw an exception in case of hbase checksum
231 * failure. This is used by unit tests only.
232 * @param value Setting this to true will cause hbase checksum
233 * verification failures to generate exceptions.
234 */
235 public static void generateExceptionForChecksumFailureForTest(boolean value) {
236 generateExceptions = value;
237 }
238 }
239