001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.compress;
019
020import static org.junit.Assert.assertTrue;
021
022import java.io.ByteArrayInputStream;
023import java.io.ByteArrayOutputStream;
024import java.util.Arrays;
025import java.util.Random;
026import org.apache.hadoop.conf.Configurable;
027import org.apache.hadoop.hbase.util.Bytes;
028import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
029import org.apache.hadoop.hbase.util.RandomDistribution;
030import org.apache.hadoop.io.IOUtils;
031import org.apache.hadoop.io.compress.CompressionCodec;
032import org.apache.hadoop.io.compress.CompressionInputStream;
033import org.apache.hadoop.io.compress.CompressionOutputStream;
034import org.apache.hadoop.io.compress.Compressor;
035import org.apache.hadoop.io.compress.Decompressor;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039@SuppressWarnings("checkstyle:innerassignment")
040public class CompressionTestBase {
041
042  protected static final Logger LOG = LoggerFactory.getLogger(CompressionTestBase.class);
043
044  protected static final int LARGE_SIZE = 10 * 1024 * 1024;
045  protected static final int VERY_LARGE_SIZE = 100 * 1024 * 1024;
046  protected static final int BLOCK_SIZE = 4096;
047
048  protected static final byte[] SMALL_INPUT;
049  static {
050    // 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597
051    SMALL_INPUT =
052      new byte[1 + 1 + 2 + 3 + 5 + 8 + 13 + 21 + 34 + 55 + 89 + 144 + 233 + 377 + 610 + 987 + 1597];
053    int off = 0;
054    Arrays.fill(SMALL_INPUT, off, (off += 1), (byte) 'A');
055    Arrays.fill(SMALL_INPUT, off, (off += 1), (byte) 'B');
056    Arrays.fill(SMALL_INPUT, off, (off += 2), (byte) 'C');
057    Arrays.fill(SMALL_INPUT, off, (off += 3), (byte) 'D');
058    Arrays.fill(SMALL_INPUT, off, (off += 5), (byte) 'E');
059    Arrays.fill(SMALL_INPUT, off, (off += 8), (byte) 'F');
060    Arrays.fill(SMALL_INPUT, off, (off += 13), (byte) 'G');
061    Arrays.fill(SMALL_INPUT, off, (off += 21), (byte) 'H');
062    Arrays.fill(SMALL_INPUT, off, (off += 34), (byte) 'I');
063    Arrays.fill(SMALL_INPUT, off, (off += 55), (byte) 'J');
064    Arrays.fill(SMALL_INPUT, off, (off += 89), (byte) 'K');
065    Arrays.fill(SMALL_INPUT, off, (off += 144), (byte) 'L');
066    Arrays.fill(SMALL_INPUT, off, (off += 233), (byte) 'M');
067    Arrays.fill(SMALL_INPUT, off, (off += 377), (byte) 'N');
068    Arrays.fill(SMALL_INPUT, off, (off += 610), (byte) 'O');
069    Arrays.fill(SMALL_INPUT, off, (off += 987), (byte) 'P');
070    Arrays.fill(SMALL_INPUT, off, (off += 1597), (byte) 'Q');
071  }
072
073  protected void codecTest(final CompressionCodec codec, final byte[][] input) throws Exception {
074    codecTest(codec, input, null);
075  }
076
077  protected void codecTest(final CompressionCodec codec, final byte[][] input,
078    final Integer expectedCompressedSize) throws Exception {
079    // We do this in Compression.java
080    ((Configurable) codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
081    // Compress
082    long start = EnvironmentEdgeManager.currentTime();
083    Compressor compressor = codec.createCompressor();
084    ByteArrayOutputStream baos = new ByteArrayOutputStream();
085    CompressionOutputStream out = codec.createOutputStream(baos, compressor);
086    int inLen = 0;
087    for (int i = 0; i < input.length; i++) {
088      out.write(input[i]);
089      inLen += input[i].length;
090    }
091    out.close();
092    long end = EnvironmentEdgeManager.currentTime();
093    final byte[] compressed = baos.toByteArray();
094    LOG.info("{} compressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(), inLen,
095      compressed.length, end - start);
096    if (expectedCompressedSize != null) {
097      assertTrue("Expected compressed size does not match: (expected=" + expectedCompressedSize
098        + ", actual=" + compressed.length + ")", expectedCompressedSize == compressed.length);
099    }
100    // Decompress
101    final byte[] plain = new byte[inLen];
102    Decompressor decompressor = codec.createDecompressor();
103    CompressionInputStream in =
104      codec.createInputStream(new ByteArrayInputStream(compressed), decompressor);
105    start = EnvironmentEdgeManager.currentTime();
106    IOUtils.readFully(in, plain, 0, plain.length);
107    in.close();
108    end = EnvironmentEdgeManager.currentTime();
109    LOG.info("{} decompressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(),
110      compressed.length, plain.length, end - start);
111    // Decompressed bytes should equal the original
112    int offset = 0;
113    for (int i = 0; i < input.length; i++) {
114      assertTrue("Comparison failed at offset " + offset,
115        Bytes.compareTo(plain, offset, input[i].length, input[i], 0, input[i].length) == 0);
116      offset += input[i].length;
117    }
118  }
119
120  /**
121   * Test with one smallish input buffer
122   */
123  protected void codecSmallTest(final CompressionCodec codec) throws Exception {
124    codecTest(codec, new byte[][] { SMALL_INPUT });
125  }
126
127  /**
128   * Test with a large input (1MB) divided into blocks of 4KB.
129   */
130  protected void codecLargeTest(final CompressionCodec codec, final double sigma) throws Exception {
131    RandomDistribution.DiscreteRNG rng =
132      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, sigma);
133    final byte[][] input = new byte[LARGE_SIZE / BLOCK_SIZE][BLOCK_SIZE];
134    fill(rng, input);
135    codecTest(codec, input);
136  }
137
138  /**
139   * Test with a very large input (100MB) as a single input buffer.
140   */
141  protected void codecVeryLargeTest(final CompressionCodec codec, final double sigma)
142    throws Exception {
143    RandomDistribution.DiscreteRNG rng =
144      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, sigma);
145    final byte[][] input = new byte[1][VERY_LARGE_SIZE];
146    fill(rng, input);
147    codecTest(codec, input);
148  }
149
150  protected static void fill(RandomDistribution.DiscreteRNG rng, byte[][] input) {
151    for (int i = 0; i < input.length; i++) {
152      fill(rng, input[i]);
153    }
154  }
155
156  protected static void fill(RandomDistribution.DiscreteRNG rng, byte[] input) {
157    for (int i = 0; i < input.length; i++) {
158      input[i] = (byte) rng.nextInt();
159    }
160  }
161
162}