001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.compress.zstd; 019 020import static org.junit.jupiter.api.Assertions.assertNotNull; 021import static org.junit.jupiter.api.Assertions.assertTrue; 022 023import java.io.File; 024import java.io.FileOutputStream; 025import java.io.IOException; 026import java.util.Random; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.CommonConfigurationKeys; 029import org.apache.hadoop.hbase.io.compress.CompressionTestBase; 030import org.apache.hadoop.hbase.io.compress.DictionaryCache; 031import org.apache.hadoop.hbase.testclassification.SmallTests; 032import org.apache.hadoop.hbase.util.RandomDistribution; 033import org.junit.jupiter.api.BeforeAll; 034import org.junit.jupiter.api.Tag; 035import org.junit.jupiter.api.Test; 036 037@Tag(SmallTests.TAG) 038public class TestZstdDictionary extends CompressionTestBase { 039 040 private static final String DICTIONARY_PATH = DictionaryCache.RESOURCE_SCHEME + "zstd.test.dict"; 041 // zstd.test.data compressed with zstd.test.dict at level 3 with a default buffer size of 262144 042 // will produce a result of 359909 bytes 043 private static final int EXPECTED_COMPRESSED_SIZE = 359909; 044 private static byte[] TEST_DATA; 045 046 @BeforeAll 047 public static void setUp() throws Exception { 048 Configuration conf = new Configuration(); 049 TEST_DATA = DictionaryCache.loadFromResource(conf, 050 DictionaryCache.RESOURCE_SCHEME + "zstd.test.data", /* maxSize */ 1024 * 1024); 051 assertNotNull(TEST_DATA, "Failed to load test data"); 052 } 053 054 @Test 055 public void test() throws Exception { 056 Configuration conf = new Configuration(); 057 conf.setInt(CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_LEVEL_KEY, 3); 058 conf.set(ZstdCodec.ZSTD_DICTIONARY_KEY, DICTIONARY_PATH); 059 ZstdCodec codec = new ZstdCodec(); 060 codec.setConf(conf); 061 codecTest(codec, new byte[][] { TEST_DATA }, EXPECTED_COMPRESSED_SIZE); 062 // Assert that the dictionary was actually loaded 063 assertTrue(DictionaryCache.contains(DICTIONARY_PATH), "Dictionary was not loaded by codec"); 064 } 065 066 // 067 // For generating the test data in src/test/resources/ 068 // 069 public static void main(String[] args) throws IOException { 070 // Write 1000 1k blocks for training to the specified file 071 // Train with: 072 // zstd --train -B1024 -o <dictionary_file> <input_file> 073 if (args.length < 1) { 074 System.err.println("Usage: TestZstdCodec <outFile>"); 075 System.exit(-1); 076 } 077 final RandomDistribution.DiscreteRNG rng = 078 new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, 2); 079 final File outFile = new File(args[0]); 080 final byte[] buffer = new byte[1024]; 081 System.out.println("Generating " + outFile); 082 try (FileOutputStream os = new FileOutputStream(outFile)) { 083 for (int i = 0; i < 1000; i++) { 084 fill(rng, buffer); 085 os.write(buffer); 086 } 087 } 088 System.out.println("Done"); 089 } 090}