001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.nio.ByteBuffer; 023 024import org.apache.hadoop.fs.ChecksumException; 025import org.apache.yetus.audience.InterfaceAudience; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028import org.apache.hadoop.hbase.util.ChecksumType; 029import org.apache.hadoop.util.DataChecksum; 030 031/** 032 * Utility methods to compute and validate checksums. 033 */ 034@InterfaceAudience.Private 035public class ChecksumUtil { 036 public static final Logger LOG = LoggerFactory.getLogger(ChecksumUtil.class); 037 038 /** This is used to reserve space in a byte buffer */ 039 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; 040 041 /** 042 * This is used by unit tests to make checksum failures throw an 043 * exception instead of returning null. Returning a null value from 044 * checksum validation will cause the higher layer to retry that 045 * read with hdfs-level checksums. Instead, we would like checksum 046 * failures to cause the entire unit test to fail. 047 */ 048 private static boolean generateExceptions = false; 049 050 /** 051 * Generates a checksum for all the data in indata. The checksum is 052 * written to outdata. 053 * @param indata input data stream 054 * @param startOffset starting offset in the indata stream from where to 055 * compute checkums from 056 * @param endOffset ending offset in the indata stream upto 057 * which checksums needs to be computed 058 * @param outdata the output buffer where checksum values are written 059 * @param outOffset the starting offset in the outdata where the 060 * checksum values are written 061 * @param checksumType type of checksum 062 * @param bytesPerChecksum number of bytes per checksum value 063 */ 064 static void generateChecksums(byte[] indata, int startOffset, int endOffset, 065 byte[] outdata, int outOffset, ChecksumType checksumType, 066 int bytesPerChecksum) throws IOException { 067 068 if (checksumType == ChecksumType.NULL) { 069 return; // No checksum for this block. 070 } 071 072 DataChecksum checksum = DataChecksum.newDataChecksum( 073 checksumType.getDataChecksumType(), bytesPerChecksum); 074 075 checksum.calculateChunkedSums( 076 ByteBuffer.wrap(indata, startOffset, endOffset - startOffset), 077 ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset)); 078 } 079 080 /** 081 * Validates that the data in the specified HFileBlock matches the checksum. Generates the 082 * checksums for the data and then validate that it matches those stored in the end of the data. 083 * @param buffer Contains the data in following order: HFileBlock header, data, checksums. 084 * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging. 085 * @param offset offset of the data being validated. Only used for logging. 086 * @param hdrSize Size of the block header in {@code data}. Only used for logging. 087 * @return True if checksum matches, else false. 088 */ 089 static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize) 090 throws IOException { 091 // A ChecksumType.NULL indicates that the caller is not interested in validating checksums, 092 // so we always return true. 093 ChecksumType cktype = 094 ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX)); 095 if (cktype == ChecksumType.NULL) { 096 return true; // No checksum validations needed for this block. 097 } 098 099 // read in the stored value of the checksum size from the header. 100 int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX); 101 102 DataChecksum dataChecksum = DataChecksum.newDataChecksum( 103 cktype.getDataChecksumType(), bytesPerChecksum); 104 assert dataChecksum != null; 105 int onDiskDataSizeWithHeader = 106 buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); 107 if (LOG.isTraceEnabled()) { 108 LOG.info("dataLength=" + buffer.capacity() 109 + ", sizeWithHeader=" + onDiskDataSizeWithHeader 110 + ", checksumType=" + cktype.getName() 111 + ", file=" + pathName 112 + ", offset=" + offset 113 + ", headerSize=" + hdrSize 114 + ", bytesPerChecksum=" + bytesPerChecksum); 115 } 116 try { 117 ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader); 118 ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader) 119 .limit(buffer.capacity()); 120 dataChecksum.verifyChunkedSums(data, checksums, pathName, 0); 121 } catch (ChecksumException e) { 122 return false; 123 } 124 return true; // checksum is valid 125 } 126 127 /** 128 * Returns the number of bytes needed to store the checksums for 129 * a specified data size 130 * @param datasize number of bytes of data 131 * @param bytesPerChecksum number of bytes in a checksum chunk 132 * @return The number of bytes needed to store the checksum values 133 */ 134 static long numBytes(long datasize, int bytesPerChecksum) { 135 return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE; 136 } 137 138 /** 139 * Returns the number of checksum chunks needed to store the checksums for 140 * a specified data size 141 * @param datasize number of bytes of data 142 * @param bytesPerChecksum number of bytes in a checksum chunk 143 * @return The number of checksum chunks 144 */ 145 static long numChunks(long datasize, int bytesPerChecksum) { 146 long numChunks = datasize/bytesPerChecksum; 147 if (datasize % bytesPerChecksum != 0) { 148 numChunks++; 149 } 150 return numChunks; 151 } 152 153 /** 154 * Write dummy checksums to the end of the specified bytes array 155 * to reserve space for writing checksums later 156 * @param baos OutputStream to write dummy checkum values 157 * @param numBytes Number of bytes of data for which dummy checksums 158 * need to be generated 159 * @param bytesPerChecksum Number of bytes per checksum value 160 */ 161 static void reserveSpaceForChecksums(ByteArrayOutputStream baos, 162 int numBytes, int bytesPerChecksum) throws IOException { 163 long numChunks = numChunks(numBytes, bytesPerChecksum); 164 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE; 165 while (bytesLeft > 0) { 166 long count = Math.min(bytesLeft, DUMMY_VALUE.length); 167 baos.write(DUMMY_VALUE, 0, (int)count); 168 bytesLeft -= count; 169 } 170 } 171 172 /** 173 * Mechanism to throw an exception in case of hbase checksum 174 * failure. This is used by unit tests only. 175 * @param value Setting this to true will cause hbase checksum 176 * verification failures to generate exceptions. 177 */ 178 public static void generateExceptionForChecksumFailureForTest(boolean value) { 179 generateExceptions = value; 180 } 181} 182