001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.nio.ByteBuffer;
023
024import org.apache.hadoop.fs.ChecksumException;
025import org.apache.yetus.audience.InterfaceAudience;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028import org.apache.hadoop.hbase.util.ChecksumType;
029import org.apache.hadoop.util.DataChecksum;
030
031/**
032 * Utility methods to compute and validate checksums.
033 */
034@InterfaceAudience.Private
035public class ChecksumUtil {
036  public static final Logger LOG = LoggerFactory.getLogger(ChecksumUtil.class);
037
038  /** This is used to reserve space in a byte buffer */
039  private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
040
041  /**
042   * This is used by unit tests to make checksum failures throw an
043   * exception instead of returning null. Returning a null value from
044   * checksum validation will cause the higher layer to retry that
045   * read with hdfs-level checksums. Instead, we would like checksum
046   * failures to cause the entire unit test to fail.
047   */
048  private static boolean generateExceptions = false;
049
050  /**
051   * Generates a checksum for all the data in indata. The checksum is
052   * written to outdata.
053   * @param indata input data stream
054   * @param startOffset starting offset in the indata stream from where to
055   *                    compute checkums from
056   * @param endOffset ending offset in the indata stream upto
057   *                   which checksums needs to be computed
058   * @param outdata the output buffer where checksum values are written
059   * @param outOffset the starting offset in the outdata where the
060   *                  checksum values are written
061   * @param checksumType type of checksum
062   * @param bytesPerChecksum number of bytes per checksum value
063   */
064  static void generateChecksums(byte[] indata, int startOffset, int endOffset,
065    byte[] outdata, int outOffset, ChecksumType checksumType,
066    int bytesPerChecksum) throws IOException {
067
068    if (checksumType == ChecksumType.NULL) {
069      return; // No checksum for this block.
070    }
071
072    DataChecksum checksum = DataChecksum.newDataChecksum(
073        checksumType.getDataChecksumType(), bytesPerChecksum);
074
075    checksum.calculateChunkedSums(
076       ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
077       ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
078  }
079
080  /**
081   * Validates that the data in the specified HFileBlock matches the checksum. Generates the
082   * checksums for the data and then validate that it matches those stored in the end of the data.
083   * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
084   * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
085   * @param offset offset of the data being validated. Only used for logging.
086   * @param hdrSize Size of the block header in {@code data}. Only used for logging.
087   * @return True if checksum matches, else false.
088   */
089  static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
090      throws IOException {
091    // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
092    // so we always return true.
093    ChecksumType cktype =
094        ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
095    if (cktype == ChecksumType.NULL) {
096      return true; // No checksum validations needed for this block.
097    }
098
099    // read in the stored value of the checksum size from the header.
100    int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
101
102    DataChecksum dataChecksum = DataChecksum.newDataChecksum(
103        cktype.getDataChecksumType(), bytesPerChecksum);
104    assert dataChecksum != null;
105    int onDiskDataSizeWithHeader =
106        buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
107    if (LOG.isTraceEnabled()) {
108      LOG.info("dataLength=" + buffer.capacity()
109          + ", sizeWithHeader=" + onDiskDataSizeWithHeader
110          + ", checksumType=" + cktype.getName()
111          + ", file=" + pathName
112          + ", offset=" + offset
113          + ", headerSize=" + hdrSize
114          + ", bytesPerChecksum=" + bytesPerChecksum);
115    }
116    try {
117      ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
118      ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
119          .limit(buffer.capacity());
120      dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
121    } catch (ChecksumException e) {
122      return false;
123    }
124    return true;  // checksum is valid
125  }
126
127  /**
128   * Returns the number of bytes needed to store the checksums for
129   * a specified data size
130   * @param datasize number of bytes of data
131   * @param bytesPerChecksum number of bytes in a checksum chunk
132   * @return The number of bytes needed to store the checksum values
133   */
134  static long numBytes(long datasize, int bytesPerChecksum) {
135    return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE;
136  }
137
138  /**
139   * Returns the number of checksum chunks needed to store the checksums for
140   * a specified data size
141   * @param datasize number of bytes of data
142   * @param bytesPerChecksum number of bytes in a checksum chunk
143   * @return The number of checksum chunks
144   */
145  static long numChunks(long datasize, int bytesPerChecksum) {
146    long numChunks = datasize/bytesPerChecksum;
147    if (datasize % bytesPerChecksum != 0) {
148      numChunks++;
149    }
150    return numChunks;
151  }
152
153  /**
154   * Write dummy checksums to the end of the specified bytes array
155   * to reserve space for writing checksums later
156   * @param baos OutputStream to write dummy checkum values
157   * @param numBytes Number of bytes of data for which dummy checksums
158   *                 need to be generated
159   * @param bytesPerChecksum Number of bytes per checksum value
160   */
161  static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
162    int numBytes, int bytesPerChecksum) throws IOException {
163    long numChunks = numChunks(numBytes, bytesPerChecksum);
164    long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
165    while (bytesLeft > 0) {
166      long count = Math.min(bytesLeft, DUMMY_VALUE.length);
167      baos.write(DUMMY_VALUE, 0, (int)count);
168      bytesLeft -= count;
169    }
170  }
171
172  /**
173   * Mechanism to throw an exception in case of hbase checksum
174   * failure. This is used by unit tests only.
175   * @param value Setting this to true will cause hbase checksum
176   *              verification failures to generate exceptions.
177   */
178  public static void generateExceptionForChecksumFailureForTest(boolean value) {
179    generateExceptions = value;
180  }
181}
182