001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.compress;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021
022import java.io.ByteArrayInputStream;
023import java.io.ByteArrayOutputStream;
024import java.util.Arrays;
025import java.util.Random;
026import org.apache.hadoop.conf.Configurable;
027import org.apache.hadoop.hbase.util.Bytes;
028import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
029import org.apache.hadoop.hbase.util.RandomDistribution;
030import org.apache.hadoop.io.IOUtils;
031import org.apache.hadoop.io.compress.CompressionCodec;
032import org.apache.hadoop.io.compress.CompressionInputStream;
033import org.apache.hadoop.io.compress.CompressionOutputStream;
034import org.apache.hadoop.io.compress.Compressor;
035import org.apache.hadoop.io.compress.Decompressor;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039@SuppressWarnings("checkstyle:innerassignment")
040public class CompressionTestBase {
041
042  protected static final Logger LOG = LoggerFactory.getLogger(CompressionTestBase.class);
043
044  protected static final int LARGE_SIZE = 10 * 1024 * 1024;
045  protected static final int VERY_LARGE_SIZE = 100 * 1024 * 1024;
046  protected static final int BLOCK_SIZE = 4096;
047
048  protected static final byte[] SMALL_INPUT;
049  static {
050    // 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597
051    SMALL_INPUT =
052      new byte[1 + 1 + 2 + 3 + 5 + 8 + 13 + 21 + 34 + 55 + 89 + 144 + 233 + 377 + 610 + 987 + 1597];
053    int off = 0;
054    Arrays.fill(SMALL_INPUT, off, (off += 1), (byte) 'A');
055    Arrays.fill(SMALL_INPUT, off, (off += 1), (byte) 'B');
056    Arrays.fill(SMALL_INPUT, off, (off += 2), (byte) 'C');
057    Arrays.fill(SMALL_INPUT, off, (off += 3), (byte) 'D');
058    Arrays.fill(SMALL_INPUT, off, (off += 5), (byte) 'E');
059    Arrays.fill(SMALL_INPUT, off, (off += 8), (byte) 'F');
060    Arrays.fill(SMALL_INPUT, off, (off += 13), (byte) 'G');
061    Arrays.fill(SMALL_INPUT, off, (off += 21), (byte) 'H');
062    Arrays.fill(SMALL_INPUT, off, (off += 34), (byte) 'I');
063    Arrays.fill(SMALL_INPUT, off, (off += 55), (byte) 'J');
064    Arrays.fill(SMALL_INPUT, off, (off += 89), (byte) 'K');
065    Arrays.fill(SMALL_INPUT, off, (off += 144), (byte) 'L');
066    Arrays.fill(SMALL_INPUT, off, (off += 233), (byte) 'M');
067    Arrays.fill(SMALL_INPUT, off, (off += 377), (byte) 'N');
068    Arrays.fill(SMALL_INPUT, off, (off += 610), (byte) 'O');
069    Arrays.fill(SMALL_INPUT, off, (off += 987), (byte) 'P');
070    Arrays.fill(SMALL_INPUT, off, (off += 1597), (byte) 'Q');
071  }
072
073  protected void codecTest(final CompressionCodec codec, final byte[][] input) throws Exception {
074    codecTest(codec, input, null);
075  }
076
077  protected void codecTest(final CompressionCodec codec, final byte[][] input,
078    final Integer expectedCompressedSize) throws Exception {
079    // We do this in Compression.java
080    ((Configurable) codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
081    // Compress
082    long start = EnvironmentEdgeManager.currentTime();
083    Compressor compressor = codec.createCompressor();
084    ByteArrayOutputStream baos = new ByteArrayOutputStream();
085    CompressionOutputStream out = codec.createOutputStream(baos, compressor);
086    int inLen = 0;
087    for (int i = 0; i < input.length; i++) {
088      out.write(input[i]);
089      inLen += input[i].length;
090    }
091    out.close();
092    long end = EnvironmentEdgeManager.currentTime();
093    final byte[] compressed = baos.toByteArray();
094    LOG.info("{} compressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(), inLen,
095      compressed.length, end - start);
096    if (expectedCompressedSize != null) {
097      assertEquals(expectedCompressedSize, compressed.length,
098        "Expected compressed size does not match: (expected=" + expectedCompressedSize + ", actual="
099          + compressed.length + ")");
100    }
101    // Decompress
102    final byte[] plain = new byte[inLen];
103    Decompressor decompressor = codec.createDecompressor();
104    CompressionInputStream in =
105      codec.createInputStream(new ByteArrayInputStream(compressed), decompressor);
106    start = EnvironmentEdgeManager.currentTime();
107    IOUtils.readFully(in, plain, 0, plain.length);
108    in.close();
109    end = EnvironmentEdgeManager.currentTime();
110    LOG.info("{} decompressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(),
111      compressed.length, plain.length, end - start);
112    // Decompressed bytes should equal the original
113    int offset = 0;
114    for (int i = 0; i < input.length; i++) {
115      assertEquals(0, Bytes.compareTo(plain, offset, input[i].length, input[i], 0, input[i].length),
116        "Comparison failed at offset " + offset);
117      offset += input[i].length;
118    }
119  }
120
121  /**
122   * Test with one smallish input buffer
123   */
124  protected void codecSmallTest(final CompressionCodec codec) throws Exception {
125    codecTest(codec, new byte[][] { SMALL_INPUT });
126  }
127
128  /**
129   * Test with a large input (1MB) divided into blocks of 4KB.
130   */
131  protected void codecLargeTest(final CompressionCodec codec, final double sigma) throws Exception {
132    RandomDistribution.DiscreteRNG rng =
133      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, sigma);
134    final byte[][] input = new byte[LARGE_SIZE / BLOCK_SIZE][BLOCK_SIZE];
135    fill(rng, input);
136    codecTest(codec, input);
137  }
138
139  /**
140   * Test with a very large input (100MB) as a single input buffer.
141   */
142  protected void codecVeryLargeTest(final CompressionCodec codec, final double sigma)
143    throws Exception {
144    RandomDistribution.DiscreteRNG rng =
145      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, sigma);
146    final byte[][] input = new byte[1][VERY_LARGE_SIZE];
147    fill(rng, input);
148    codecTest(codec, input);
149  }
150
151  protected static void fill(RandomDistribution.DiscreteRNG rng, byte[][] input) {
152    for (int i = 0; i < input.length; i++) {
153      fill(rng, input[i]);
154    }
155  }
156
157  protected static void fill(RandomDistribution.DiscreteRNG rng, byte[] input) {
158    for (int i = 0; i < input.length; i++) {
159      input[i] = (byte) rng.nextInt();
160    }
161  }
162
163}