001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.compress.zstd;
019
020import static org.junit.Assert.assertNotNull;
021import static org.junit.Assert.assertTrue;
022
023import java.io.File;
024import java.io.FileOutputStream;
025import java.io.IOException;
026import java.util.Random;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.CommonConfigurationKeys;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.io.compress.CompressionTestBase;
031import org.apache.hadoop.hbase.io.compress.DictionaryCache;
032import org.apache.hadoop.hbase.testclassification.SmallTests;
033import org.apache.hadoop.hbase.util.RandomDistribution;
034import org.junit.BeforeClass;
035import org.junit.ClassRule;
036import org.junit.Test;
037import org.junit.experimental.categories.Category;
038
039@Category(SmallTests.class)
040public class TestZstdDictionary extends CompressionTestBase {
041
042  @ClassRule
043  public static final HBaseClassTestRule CLASS_RULE =
044    HBaseClassTestRule.forClass(TestZstdDictionary.class);
045
046  private static final String DICTIONARY_PATH = DictionaryCache.RESOURCE_SCHEME + "zstd.test.dict";
047  // zstd.test.data compressed with zstd.test.dict at level 3 with a default buffer size of 262144
048  // will produce a result of 359909 bytes
049  private static final int EXPECTED_COMPRESSED_SIZE = 359909;
050
051  private static byte[] TEST_DATA;
052
053  @BeforeClass
054  public static void setUp() throws Exception {
055    Configuration conf = new Configuration();
056    TEST_DATA = DictionaryCache.loadFromResource(conf,
057      DictionaryCache.RESOURCE_SCHEME + "zstd.test.data", /* maxSize */ 1024 * 1024);
058    assertNotNull("Failed to load test data", TEST_DATA);
059  }
060
061  @Test
062  public void test() throws Exception {
063    Configuration conf = new Configuration();
064    conf.setInt(CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_LEVEL_KEY, 3);
065    conf.set(ZstdCodec.ZSTD_DICTIONARY_KEY, DICTIONARY_PATH);
066    ZstdCodec codec = new ZstdCodec();
067    codec.setConf(conf);
068    codecTest(codec, new byte[][] { TEST_DATA }, EXPECTED_COMPRESSED_SIZE);
069    // Assert that the dictionary was actually loaded
070    assertTrue("Dictionary was not loaded by codec", DictionaryCache.contains(DICTIONARY_PATH));
071  }
072
073  //
074  // For generating the test data in src/test/resources/
075  //
076
077  public static void main(String[] args) throws IOException {
078    // Write 1000 1k blocks for training to the specified file
079    // Train with:
080    // zstd --train -B1024 -o <dictionary_file> <input_file>
081    if (args.length < 1) {
082      System.err.println("Usage: TestZstdCodec <outFile>");
083      System.exit(-1);
084    }
085    final RandomDistribution.DiscreteRNG rng =
086      new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, 2);
087    final File outFile = new File(args[0]);
088    final byte[] buffer = new byte[1024];
089    System.out.println("Generating " + outFile);
090    try (FileOutputStream os = new FileOutputStream(outFile)) {
091      for (int i = 0; i < 1000; i++) {
092        fill(rng, buffer);
093        os.write(buffer);
094      }
095    }
096    System.out.println("Done");
097  }
098
099}