001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.compress.zstd;
019
020import static org.junit.jupiter.api.Assertions.assertNotNull;
021import static org.junit.jupiter.api.Assertions.assertTrue;
022
023import java.io.File;
024import java.io.FileOutputStream;
025import java.io.IOException;
026import java.util.Random;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.CommonConfigurationKeys;
029import org.apache.hadoop.hbase.io.compress.CompressionTestBase;
030import org.apache.hadoop.hbase.io.compress.DictionaryCache;
031import org.apache.hadoop.hbase.testclassification.SmallTests;
032import org.apache.hadoop.hbase.util.RandomDistribution;
033import org.junit.jupiter.api.BeforeAll;
034import org.junit.jupiter.api.Tag;
035import org.junit.jupiter.api.Test;
036
037@Tag(SmallTests.TAG)
038public class TestZstdDictionary extends CompressionTestBase {
039
040  private static final String DICTIONARY_PATH = DictionaryCache.RESOURCE_SCHEME + "zstd.test.dict";
041  // zstd.test.data compressed with zstd.test.dict at level 3 with a default buffer size of 262144
042  // will produce a result of 359909 bytes
043  private static final int EXPECTED_COMPRESSED_SIZE = 359909;
044  private static byte[] TEST_DATA;
045
046  @BeforeAll
047  public static void setUp() throws Exception {
048    Configuration conf = new Configuration();
049    TEST_DATA = DictionaryCache.loadFromResource(conf,
050      DictionaryCache.RESOURCE_SCHEME + "zstd.test.data", /* maxSize */ 1024 * 1024);
051    assertNotNull(TEST_DATA, "Failed to load test data");
052  }
053
054  @Test
055  public void test() throws Exception {
056    Configuration conf = new Configuration();
057    conf.setInt(CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_LEVEL_KEY, 3);
058    conf.set(ZstdCodec.ZSTD_DICTIONARY_KEY, DICTIONARY_PATH);
059    ZstdCodec codec = new ZstdCodec();
060    codec.setConf(conf);
061    codecTest(codec, new byte[][] { TEST_DATA }, EXPECTED_COMPRESSED_SIZE);
062    // Assert that the dictionary was actually loaded
063    assertTrue(DictionaryCache.contains(DICTIONARY_PATH), "Dictionary was not loaded by codec");
064  }
065
066  //
067  // For generating the test data in src/test/resources/
068  //
069  public static void main(String[] args) throws IOException {
070    // Write 1000 1k blocks for training to the specified file
071    // Train with:
072    // zstd --train -B1024 -o <dictionary_file> <input_file>
073    if (args.length < 1) {
074      System.err.println("Usage: TestZstdCodec <outFile>");
075      System.exit(-1);
076    }
077    final RandomDistribution.DiscreteRNG rng =
078      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, 2);
079    final File outFile = new File(args[0]);
080    final byte[] buffer = new byte[1024];
081    System.out.println("Generating " + outFile);
082    try (FileOutputStream os = new FileOutputStream(outFile)) {
083      for (int i = 0; i < 1000; i++) {
084        fill(rng, buffer);
085        os.write(buffer);
086      }
087    }
088    System.out.println("Done");
089  }
090}