public class DataBlockEncodingTool extends Object
Modifier and Type | Class and Description |
---|---|
private static class |
DataBlockEncodingTool.Manipulation |
Modifier and Type | Field and Description |
---|---|
private static int |
benchmarkNOmit |
private static int |
benchmarkNTimes |
private static double |
BYTES_IN_MB |
private List<org.apache.hadoop.hbase.io.encoding.EncodedDataBlock> |
codecs |
private org.apache.hadoop.hbase.io.compress.Compression.Algorithm |
compressionAlgorithm |
private String |
compressionAlgorithmName |
private org.apache.hadoop.io.compress.Compressor |
compressor |
private org.apache.hadoop.conf.Configuration |
conf |
private org.apache.hadoop.io.compress.Decompressor |
decompressor |
private static int |
DEFAULT_BENCHMARK_N_OMIT
How many first runs should not be included in the benchmark.
|
private static int |
DEFAULT_BENCHMARK_N_TIMES
How many times to run the benchmark.
|
private static org.apache.hadoop.hbase.io.compress.Compression.Algorithm |
DEFAULT_COMPRESSION
Compression algorithm to use if not specified on the command line
|
private static DecimalFormat |
DELIMITED_DECIMAL_FORMAT |
private static boolean |
includesMemstoreTS |
private static String |
INT_FORMAT |
private static org.slf4j.Logger |
LOG |
private static double |
MB_SEC_COEF |
private static double |
NS_IN_SEC |
private static String |
OPT_BENCHMARK_N_OMIT
Number of first runs of every benchmark to omit from statistics
|
private static String |
OPT_BENCHMARK_N_TIMES
Number of times to run each benchmark
|
private static String |
OPT_COMPRESSION_ALGORITHM
What compression algorithm to test
|
private static String |
OPT_HFILE_NAME
HFile name to be used in benchmark
|
private static String |
OPT_KV_LIMIT
Maximum number of key/value pairs to process in a single benchmark run
|
private static String |
OPT_MEASURE_THROUGHPUT
Whether to run a benchmark to measure read throughput
|
private static String |
OPT_OMIT_CORRECTNESS_TEST
If this is specified, no correctness testing will be done
|
private static String |
PCT_FORMAT |
private byte[] |
rawKVs |
private long |
totalCFLength |
private long |
totalKeyLength |
private long |
totalKeyRedundancyLength |
private long |
totalPrefixLength |
private long |
totalValueLength |
private static boolean |
USE_TAG |
private boolean |
useHBaseChecksum |
Constructor and Description |
---|
DataBlockEncodingTool(org.apache.hadoop.conf.Configuration conf,
String compressionAlgorithmName) |
Modifier and Type | Method and Description |
---|---|
void |
benchmarkAlgorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm algorithm,
String name,
byte[] buffer,
int offset,
int length)
Check decompress performance of a given algorithm and print it.
|
void |
benchmarkCodecs()
Benchmark codec's speed.
|
private void |
benchmarkDefaultCompression(int totalSize,
byte[] rawBuffer) |
private int |
benchmarkEncoder(int previousTotalSize,
org.apache.hadoop.hbase.io.encoding.EncodedDataBlock codec)
Benchmark compression/decompression throughput.
|
void |
checkStatistics(org.apache.hadoop.hbase.regionserver.KeyValueScanner scanner,
int kvLimit)
Check statistics for given HFile for different data block encoders.
|
void |
displayStatistics()
Display statistics of different compression algorithms.
|
static void |
main(String[] args)
A command line interface to benchmarks.
|
private void |
outputSavings(String caption,
long part,
long whole) |
private static void |
outputTuple(String caption,
String format,
Object... values) |
private void |
outputTuplePct(String caption,
long size) |
private static void |
printBenchmarkResult(int totalSize,
List<Long> durationsInNanoSec,
DataBlockEncodingTool.Manipulation manipulation) |
private static void |
printUsage(org.apache.hbase.thirdparty.org.apache.commons.cli.Options options) |
static void |
testCodecs(org.apache.hadoop.conf.Configuration conf,
int kvLimit,
String hfilePath,
String compressionName,
boolean doBenchmark,
boolean doVerify)
Test a data block encoder on the given HFile.
|
boolean |
verifyCodecs(org.apache.hadoop.hbase.regionserver.KeyValueScanner scanner,
int kvLimit)
Verify if all data block encoders are working properly.
|
private static final org.slf4j.Logger LOG
private static final boolean includesMemstoreTS
private static final int DEFAULT_BENCHMARK_N_TIMES
DEFAULT_BENCHMARK_N_OMIT
.private static final int DEFAULT_BENCHMARK_N_OMIT
private static final String OPT_HFILE_NAME
private static final String OPT_KV_LIMIT
private static final String OPT_MEASURE_THROUGHPUT
private static final String OPT_OMIT_CORRECTNESS_TEST
private static final String OPT_COMPRESSION_ALGORITHM
private static final String OPT_BENCHMARK_N_TIMES
private static final String OPT_BENCHMARK_N_OMIT
private static final org.apache.hadoop.hbase.io.compress.Compression.Algorithm DEFAULT_COMPRESSION
private static final DecimalFormat DELIMITED_DECIMAL_FORMAT
private static final String PCT_FORMAT
private static final String INT_FORMAT
private static int benchmarkNTimes
private static int benchmarkNOmit
private final org.apache.hadoop.conf.Configuration conf
private long totalPrefixLength
private long totalKeyLength
private long totalValueLength
private long totalKeyRedundancyLength
private long totalCFLength
private byte[] rawKVs
private boolean useHBaseChecksum
private final String compressionAlgorithmName
private final org.apache.hadoop.hbase.io.compress.Compression.Algorithm compressionAlgorithm
private final org.apache.hadoop.io.compress.Compressor compressor
private final org.apache.hadoop.io.compress.Decompressor decompressor
private static boolean USE_TAG
private static final double BYTES_IN_MB
private static final double NS_IN_SEC
private static final double MB_SEC_COEF
public DataBlockEncodingTool(org.apache.hadoop.conf.Configuration conf, String compressionAlgorithmName)
compressionAlgorithmName
- What kind of algorithm should be used as baseline for
comparison (e.g. lzo, gz).public void checkStatistics(org.apache.hadoop.hbase.regionserver.KeyValueScanner scanner, int kvLimit) throws IOException
scanner
- Of file which will be compressed.kvLimit
- Maximal count of KeyValue which will be processed.IOException
- thrown if scanner is invalidpublic boolean verifyCodecs(org.apache.hadoop.hbase.regionserver.KeyValueScanner scanner, int kvLimit) throws IOException
scanner
- Of file which was compressed.kvLimit
- Maximal count of KeyValue which will be processed.IOException
- thrown if scanner is invalidpublic void benchmarkCodecs() throws IOException
IOException
private int benchmarkEncoder(int previousTotalSize, org.apache.hadoop.hbase.io.encoding.EncodedDataBlock codec)
previousTotalSize
- Total size used for verification. Use -1 if unknown.codec
- Tested encoder.private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) throws IOException
IOException
public void benchmarkAlgorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm algorithm, String name, byte[] buffer, int offset, int length) throws IOException
algorithm
- Compression algorithm.name
- Name of algorithm.buffer
- Buffer to be compressed.offset
- Position of the beginning of the data.length
- Length of data in buffer. nIOException
private static void printBenchmarkResult(int totalSize, List<Long> durationsInNanoSec, DataBlockEncodingTool.Manipulation manipulation)
private static void outputTuple(String caption, String format, Object... values)
public void displayStatistics() throws IOException
IOException
private void outputTuplePct(String caption, long size)
private void outputSavings(String caption, long part, long whole)
public static void testCodecs(org.apache.hadoop.conf.Configuration conf, int kvLimit, String hfilePath, String compressionName, boolean doBenchmark, boolean doVerify) throws IOException
kvLimit
- The limit of KeyValue which will be analyzed.hfilePath
- an HFile path on the file system.compressionName
- Compression algorithm used for comparison.doBenchmark
- Run performance benchmarks.doVerify
- Verify correctness.IOException
- When pathName is incorrect.private static void printUsage(org.apache.hbase.thirdparty.org.apache.commons.cli.Options options)
public static void main(String[] args) throws IOException
args
- Should have length at least 1 and holds the file path to HFile.IOException
- If you specified the wrong file.Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.