2016-02-09 23:12:00 +00:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 23:03:42 +00:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-11-08 01:23:58 +00:00
|
|
|
//
|
|
|
|
|
2020-06-25 02:30:15 +00:00
|
|
|
#include "rocksdb/sst_dump_tool.h"
|
2014-11-08 01:23:58 +00:00
|
|
|
|
2019-06-06 20:52:39 +00:00
|
|
|
#include <cinttypes>
|
2016-05-06 23:09:09 +00:00
|
|
|
#include <iostream>
|
2016-01-13 02:20:06 +00:00
|
|
|
|
2022-04-18 19:47:16 +00:00
|
|
|
#include "options/options_helper.h"
|
2015-09-01 01:35:12 +00:00
|
|
|
#include "port/port.h"
|
2022-04-18 19:47:16 +00:00
|
|
|
#include "rocksdb/convenience.h"
|
2020-06-25 02:30:15 +00:00
|
|
|
#include "rocksdb/utilities/ldb_cmd.h"
|
|
|
|
#include "table/sst_file_dumper.h"
|
2014-11-08 01:23:58 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2014-11-08 01:23:58 +00:00
|
|
|
|
2017-08-11 22:49:17 +00:00
|
|
|
static const std::vector<std::pair<CompressionType, const char*>>
|
|
|
|
kCompressions = {
|
|
|
|
{CompressionType::kNoCompression, "kNoCompression"},
|
|
|
|
{CompressionType::kSnappyCompression, "kSnappyCompression"},
|
|
|
|
{CompressionType::kZlibCompression, "kZlibCompression"},
|
|
|
|
{CompressionType::kBZip2Compression, "kBZip2Compression"},
|
|
|
|
{CompressionType::kLZ4Compression, "kLZ4Compression"},
|
|
|
|
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
|
|
|
|
{CompressionType::kXpressCompression, "kXpressCompression"},
|
|
|
|
{CompressionType::kZSTD, "kZSTD"}};
|
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
namespace {
|
|
|
|
|
2020-06-09 17:01:12 +00:00
|
|
|
void print_help(bool to_stderr) {
|
2022-04-18 19:47:16 +00:00
|
|
|
std::string supported_compressions;
|
|
|
|
for (CompressionType ct : GetSupportedCompressions()) {
|
|
|
|
if (!supported_compressions.empty()) {
|
|
|
|
supported_compressions += ", ";
|
|
|
|
}
|
|
|
|
std::string str;
|
|
|
|
Status s = GetStringFromCompressionType(&str, ct);
|
|
|
|
assert(s.ok());
|
|
|
|
supported_compressions += str;
|
|
|
|
}
|
2019-09-19 19:32:33 +00:00
|
|
|
fprintf(
|
2020-06-09 17:01:12 +00:00
|
|
|
to_stderr ? stderr : stdout,
|
2020-06-08 20:56:22 +00:00
|
|
|
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress|identify]
|
2016-04-08 19:05:02 +00:00
|
|
|
--file=<data_dir_OR_sst_file>
|
|
|
|
Path to SST file or directory containing SST files
|
|
|
|
|
2019-10-09 02:17:39 +00:00
|
|
|
--env_uri=<uri of underlying Env>
|
2021-03-10 04:47:26 +00:00
|
|
|
URI of underlying Env, mutually exclusive with fs_uri
|
|
|
|
|
|
|
|
--fs_uri=<uri of underlying FileSystem>
|
|
|
|
URI of underlying FileSystem, mutually exclusive with env_uri
|
2019-10-09 02:17:39 +00:00
|
|
|
|
2020-06-08 20:56:22 +00:00
|
|
|
--command=check|scan|raw|verify|identify
|
2019-09-13 23:29:16 +00:00
|
|
|
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
|
2016-04-08 19:05:02 +00:00
|
|
|
scan: Iterate over entries in files and print them to screen
|
|
|
|
raw: Dump all the table contents to <file_name>_dump.txt
|
2019-09-13 23:29:16 +00:00
|
|
|
verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
|
2017-08-11 22:49:17 +00:00
|
|
|
recompress: reports the SST file size if recompressed with different
|
|
|
|
compression types
|
2020-06-08 20:56:22 +00:00
|
|
|
identify: Reports a file is a valid SST file or lists all valid SST files under a directory
|
2016-04-08 19:05:02 +00:00
|
|
|
|
|
|
|
--output_hex
|
|
|
|
Can be combined with scan command to print the keys and values in Hex
|
|
|
|
|
2019-10-18 02:35:22 +00:00
|
|
|
--decode_blob_index
|
|
|
|
Decode blob indexes and print them in a human-readable format during scans.
|
|
|
|
|
2016-04-08 19:05:02 +00:00
|
|
|
--from=<user_key>
|
|
|
|
Key to start reading from when executing check|scan
|
|
|
|
|
|
|
|
--to=<user_key>
|
|
|
|
Key to stop reading at when executing check|scan
|
|
|
|
|
2017-03-13 17:24:52 +00:00
|
|
|
--prefix=<user_key>
|
|
|
|
Returns all keys with this prefix when executing check|scan
|
|
|
|
Cannot be used in conjunction with --from
|
|
|
|
|
2016-04-08 19:05:02 +00:00
|
|
|
--read_num=<num>
|
|
|
|
Maximum number of entries to read when executing check|scan
|
|
|
|
|
|
|
|
--verify_checksum
|
|
|
|
Verify file checksum when executing check|scan
|
|
|
|
|
|
|
|
--input_key_hex
|
|
|
|
Can be combined with --from and --to to indicate that these values are encoded in Hex
|
|
|
|
|
|
|
|
--show_properties
|
2017-08-11 22:49:17 +00:00
|
|
|
Print table properties after iterating over the file when executing
|
2020-06-08 20:56:22 +00:00
|
|
|
check|scan|raw|identify
|
2016-04-08 19:05:02 +00:00
|
|
|
|
|
|
|
--set_block_size=<block_size>
|
2017-08-11 22:49:17 +00:00
|
|
|
Can be combined with --command=recompress to set the block size that will
|
|
|
|
be used when trying different compression algorithms
|
|
|
|
|
|
|
|
--compression_types=<comma-separated list of CompressionType members, e.g.,
|
|
|
|
kSnappyCompression>
|
|
|
|
Can be combined with --command=recompress to run recompression for this
|
|
|
|
list of compression types
|
2022-04-18 19:47:16 +00:00
|
|
|
Supported compression types: %s
|
2016-11-10 18:06:06 +00:00
|
|
|
|
|
|
|
--parse_internal_key=<0xKEY>
|
|
|
|
Convenience option to parse an internal key on the command line. Dumps the
|
|
|
|
internal key in hex format {'key' @ SN: type}
|
2020-04-27 19:33:49 +00:00
|
|
|
|
|
|
|
--compression_level_from=<compression_level>
|
|
|
|
Compression level to start compressing when executing recompress. One compression type
|
|
|
|
and compression_level_to must also be specified
|
|
|
|
|
|
|
|
--compression_level_to=<compression_level>
|
|
|
|
Compression level to stop compressing when executing recompress. One compression type
|
|
|
|
and compression_level_from must also be specified
|
2020-09-03 22:48:29 +00:00
|
|
|
|
|
|
|
--compression_max_dict_bytes=<uint32_t>
|
|
|
|
Maximum size of dictionary used to prime the compression library
|
|
|
|
|
|
|
|
--compression_zstd_max_train_bytes=<uint32_t>
|
|
|
|
Maximum size of training data passed to zstd's dictionary trainer
|
Limit buffering for collecting samples for compression dictionary (#7970)
Summary:
For dictionary compression, we need to collect some representative samples of the data to be compressed, which we use to either generate or train (when `CompressionOptions::zstd_max_train_bytes > 0`) a dictionary. Previously, the strategy was to buffer all the data blocks during flush, and up to the target file size during compaction. That strategy allowed us to randomly pick samples from as wide a range as possible that'd be guaranteed to land in a single output file.
However, some users try to make huge files in memory-constrained environments, where this strategy can cause OOM. This PR introduces an option, `CompressionOptions::max_dict_buffer_bytes`, that limits how much data blocks are buffered before we switch to unbuffered mode (which means creating the per-SST dictionary, writing out the buffered data, and compressing/writing new blocks as soon as they are built). It is not strict as we currently buffer more than just data blocks -- also keys are buffered. But it does make a step towards giving users predictable memory usage.
Related changes include:
- Changed sampling for dictionary compression to select unique data blocks when there is limited availability of data blocks
- Made use of `BlockBuilder::SwapAndReset()` to save an allocation+memcpy when buffering data blocks for building a dictionary
- Changed `ParseBoolean()` to accept an input containing characters after the boolean. This is necessary since, with this PR, a value for `CompressionOptions::enabled` is no longer necessarily the final component in the `CompressionOptions` string.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7970
Test Plan:
- updated `CompressionOptions` unit tests to verify limit is respected (to the extent expected in the current implementation) in various scenarios of flush/compaction to bottommost/non-bottommost level
- looked at jemalloc heap profiles right before and after switching to unbuffered mode during flush/compaction. Verified memory usage in buffering is proportional to the limit set.
Reviewed By: pdillinger
Differential Revision: D26467994
Pulled By: ajkr
fbshipit-source-id: 3da4ef9fba59974e4ef40e40c01611002c861465
2021-02-19 22:06:59 +00:00
|
|
|
|
|
|
|
--compression_max_dict_buffer_bytes=<int64_t>
|
|
|
|
Limit on buffer size from which we collect samples for dictionary generation.
|
Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857)
Summary:
An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857
Test Plan:
#### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data:
Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible.
```
# linked local ZSTD with version 1.5.2
# DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench
dict_bytes=16384
train_bytes=1048576
echo "========== No Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== Raw Content Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== FinalizeDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== TrainDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
# Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory.
# before compression data size: 1.2GB
dict_bytes=16384
max_dict_buffer_bytes = 1048576
space cpu/memory
No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k
Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k
FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k
TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k
```
#### Benchmark on 10 sample SST files for spacing saving and CPU time on compression:
FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression.
```
dict_bytes=16384
train_bytes=1048576
for sst_file in `ls ../temp/myrock-sst/`
do
echo "********** $sst_file **********"
echo "========== No Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD
echo "========== Raw Content Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes
echo "========== FinalizeDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict
echo "========== TrainDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes
done
010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst
No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254
Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523
FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433
TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593
```
#### Decompression/Read test:
With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads.
```
dict_bytes=16384
train_bytes=1048576
echo "No Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s
echo "Raw Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s
echo "FinalizeDict"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s
echo "Train Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s
No Dictionary
readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found)
Raw Dictionary
readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found)
FinalizeDict
readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found)
Train Dictionary
readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found)
```
Reviewed By: ajkr
Differential Revision: D35720026
Pulled By: cbi42
fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
2022-05-20 19:09:09 +00:00
|
|
|
|
|
|
|
--compression_use_zstd_finalize_dict
|
|
|
|
Use zstd's finalizeDictionary() API instead of zstd's dictionary trainer to generate dictionary.
|
2022-04-18 19:47:16 +00:00
|
|
|
)",
|
|
|
|
supported_compressions.c_str());
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
|
|
|
|
2020-05-13 01:21:32 +00:00
|
|
|
// arg_name would include all prefix, e.g. "--my_arg="
|
|
|
|
// arg_val is the parses value.
|
|
|
|
// True if there is a match. False otherwise.
|
|
|
|
// Woud exit after printing errmsg if cannot be parsed.
|
|
|
|
bool ParseIntArg(const char* arg, const std::string arg_name,
|
|
|
|
const std::string err_msg, int64_t* arg_val) {
|
|
|
|
if (strncmp(arg, arg_name.c_str(), arg_name.size()) == 0) {
|
|
|
|
std::string input_str = arg + arg_name.size();
|
|
|
|
std::istringstream iss(input_str);
|
|
|
|
iss >> *arg_val;
|
|
|
|
if (iss.fail()) {
|
|
|
|
fprintf(stderr, "%s\n", err_msg.c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2014-11-08 01:23:58 +00:00
|
|
|
} // namespace
|
|
|
|
|
2020-06-09 17:01:12 +00:00
|
|
|
int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
|
2021-06-15 10:42:52 +00:00
|
|
|
std::string env_uri, fs_uri;
|
2014-11-08 01:23:58 +00:00
|
|
|
const char* dir_or_file = nullptr;
|
2017-10-19 17:48:47 +00:00
|
|
|
uint64_t read_num = std::numeric_limits<uint64_t>::max();
|
2014-11-08 01:23:58 +00:00
|
|
|
std::string command;
|
|
|
|
|
|
|
|
char junk;
|
|
|
|
uint64_t n;
|
|
|
|
bool verify_checksum = false;
|
|
|
|
bool output_hex = false;
|
2019-10-18 02:35:22 +00:00
|
|
|
bool decode_blob_index = false;
|
2014-11-08 01:23:58 +00:00
|
|
|
bool input_key_hex = false;
|
|
|
|
bool has_from = false;
|
|
|
|
bool has_to = false;
|
2017-03-13 17:24:52 +00:00
|
|
|
bool use_from_as_prefix = false;
|
2014-11-08 01:23:58 +00:00
|
|
|
bool show_properties = false;
|
2017-01-04 02:24:15 +00:00
|
|
|
bool show_summary = false;
|
2015-07-24 00:05:33 +00:00
|
|
|
bool set_block_size = false;
|
2020-04-27 19:33:49 +00:00
|
|
|
bool has_compression_level_from = false;
|
|
|
|
bool has_compression_level_to = false;
|
|
|
|
bool has_specified_compression_types = false;
|
2014-11-08 01:23:58 +00:00
|
|
|
std::string from_key;
|
|
|
|
std::string to_key;
|
2015-07-24 00:05:33 +00:00
|
|
|
std::string block_size_str;
|
2020-04-27 19:33:49 +00:00
|
|
|
std::string compression_level_from_str;
|
|
|
|
std::string compression_level_to_str;
|
2017-10-19 17:48:47 +00:00
|
|
|
size_t block_size = 0;
|
2020-05-13 01:21:32 +00:00
|
|
|
size_t readahead_size = 2 * 1024 * 1024;
|
2017-08-11 22:49:17 +00:00
|
|
|
std::vector<std::pair<CompressionType, const char*>> compression_types;
|
2017-01-04 02:24:15 +00:00
|
|
|
uint64_t total_num_files = 0;
|
|
|
|
uint64_t total_num_data_blocks = 0;
|
|
|
|
uint64_t total_data_block_size = 0;
|
|
|
|
uint64_t total_index_block_size = 0;
|
|
|
|
uint64_t total_filter_block_size = 0;
|
2020-04-27 19:33:49 +00:00
|
|
|
int32_t compress_level_from = CompressionOptions::kDefaultCompressionLevel;
|
|
|
|
int32_t compress_level_to = CompressionOptions::kDefaultCompressionLevel;
|
2020-09-03 22:48:29 +00:00
|
|
|
uint32_t compression_max_dict_bytes =
|
|
|
|
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_bytes;
|
|
|
|
uint32_t compression_zstd_max_train_bytes =
|
|
|
|
ROCKSDB_NAMESPACE::CompressionOptions().zstd_max_train_bytes;
|
Limit buffering for collecting samples for compression dictionary (#7970)
Summary:
For dictionary compression, we need to collect some representative samples of the data to be compressed, which we use to either generate or train (when `CompressionOptions::zstd_max_train_bytes > 0`) a dictionary. Previously, the strategy was to buffer all the data blocks during flush, and up to the target file size during compaction. That strategy allowed us to randomly pick samples from as wide a range as possible that'd be guaranteed to land in a single output file.
However, some users try to make huge files in memory-constrained environments, where this strategy can cause OOM. This PR introduces an option, `CompressionOptions::max_dict_buffer_bytes`, that limits how much data blocks are buffered before we switch to unbuffered mode (which means creating the per-SST dictionary, writing out the buffered data, and compressing/writing new blocks as soon as they are built). It is not strict as we currently buffer more than just data blocks -- also keys are buffered. But it does make a step towards giving users predictable memory usage.
Related changes include:
- Changed sampling for dictionary compression to select unique data blocks when there is limited availability of data blocks
- Made use of `BlockBuilder::SwapAndReset()` to save an allocation+memcpy when buffering data blocks for building a dictionary
- Changed `ParseBoolean()` to accept an input containing characters after the boolean. This is necessary since, with this PR, a value for `CompressionOptions::enabled` is no longer necessarily the final component in the `CompressionOptions` string.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7970
Test Plan:
- updated `CompressionOptions` unit tests to verify limit is respected (to the extent expected in the current implementation) in various scenarios of flush/compaction to bottommost/non-bottommost level
- looked at jemalloc heap profiles right before and after switching to unbuffered mode during flush/compaction. Verified memory usage in buffering is proportional to the limit set.
Reviewed By: pdillinger
Differential Revision: D26467994
Pulled By: ajkr
fbshipit-source-id: 3da4ef9fba59974e4ef40e40c01611002c861465
2021-02-19 22:06:59 +00:00
|
|
|
uint64_t compression_max_dict_buffer_bytes =
|
|
|
|
ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes;
|
Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857)
Summary:
An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857
Test Plan:
#### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data:
Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible.
```
# linked local ZSTD with version 1.5.2
# DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench
dict_bytes=16384
train_bytes=1048576
echo "========== No Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== Raw Content Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== FinalizeDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== TrainDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
# Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory.
# before compression data size: 1.2GB
dict_bytes=16384
max_dict_buffer_bytes = 1048576
space cpu/memory
No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k
Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k
FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k
TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k
```
#### Benchmark on 10 sample SST files for spacing saving and CPU time on compression:
FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression.
```
dict_bytes=16384
train_bytes=1048576
for sst_file in `ls ../temp/myrock-sst/`
do
echo "********** $sst_file **********"
echo "========== No Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD
echo "========== Raw Content Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes
echo "========== FinalizeDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict
echo "========== TrainDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes
done
010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst
No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254
Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523
FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433
TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593
```
#### Decompression/Read test:
With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads.
```
dict_bytes=16384
train_bytes=1048576
echo "No Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s
echo "Raw Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s
echo "FinalizeDict"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s
echo "Train Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s
No Dictionary
readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found)
Raw Dictionary
readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found)
FinalizeDict
readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found)
Train Dictionary
readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found)
```
Reviewed By: ajkr
Differential Revision: D35720026
Pulled By: cbi42
fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
2022-05-20 19:09:09 +00:00
|
|
|
bool compression_use_zstd_finalize_dict =
|
|
|
|
!ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer;
|
2020-05-13 01:21:32 +00:00
|
|
|
|
|
|
|
int64_t tmp_val;
|
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
for (int i = 1; i < argc; i++) {
|
2019-10-09 02:17:39 +00:00
|
|
|
if (strncmp(argv[i], "--env_uri=", 10) == 0) {
|
|
|
|
env_uri = argv[i] + 10;
|
2021-03-10 04:47:26 +00:00
|
|
|
} else if (strncmp(argv[i], "--fs_uri=", 9) == 0) {
|
|
|
|
fs_uri = argv[i] + 9;
|
2019-10-09 02:17:39 +00:00
|
|
|
} else if (strncmp(argv[i], "--file=", 7) == 0) {
|
2014-11-08 01:23:58 +00:00
|
|
|
dir_or_file = argv[i] + 7;
|
|
|
|
} else if (strcmp(argv[i], "--output_hex") == 0) {
|
|
|
|
output_hex = true;
|
2019-10-18 02:35:22 +00:00
|
|
|
} else if (strcmp(argv[i], "--decode_blob_index") == 0) {
|
|
|
|
decode_blob_index = true;
|
2014-11-08 01:23:58 +00:00
|
|
|
} else if (strcmp(argv[i], "--input_key_hex") == 0) {
|
|
|
|
input_key_hex = true;
|
2019-10-09 02:17:39 +00:00
|
|
|
} else if (sscanf(argv[i], "--read_num=%lu%c", (unsigned long*)&n, &junk) ==
|
|
|
|
1) {
|
2014-11-08 01:23:58 +00:00
|
|
|
read_num = n;
|
|
|
|
} else if (strcmp(argv[i], "--verify_checksum") == 0) {
|
|
|
|
verify_checksum = true;
|
|
|
|
} else if (strncmp(argv[i], "--command=", 10) == 0) {
|
|
|
|
command = argv[i] + 10;
|
|
|
|
} else if (strncmp(argv[i], "--from=", 7) == 0) {
|
|
|
|
from_key = argv[i] + 7;
|
|
|
|
has_from = true;
|
|
|
|
} else if (strncmp(argv[i], "--to=", 5) == 0) {
|
|
|
|
to_key = argv[i] + 5;
|
|
|
|
has_to = true;
|
2017-03-13 17:24:52 +00:00
|
|
|
} else if (strncmp(argv[i], "--prefix=", 9) == 0) {
|
|
|
|
from_key = argv[i] + 9;
|
|
|
|
use_from_as_prefix = true;
|
2014-11-08 01:23:58 +00:00
|
|
|
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
|
|
|
show_properties = true;
|
2017-01-04 02:24:15 +00:00
|
|
|
} else if (strcmp(argv[i], "--show_summary") == 0) {
|
|
|
|
show_summary = true;
|
2020-05-13 01:21:32 +00:00
|
|
|
} else if (ParseIntArg(argv[i], "--set_block_size=",
|
|
|
|
"block size must be numeric", &tmp_val)) {
|
2015-07-24 00:05:33 +00:00
|
|
|
set_block_size = true;
|
2020-05-13 01:21:32 +00:00
|
|
|
block_size = static_cast<size_t>(tmp_val);
|
|
|
|
} else if (ParseIntArg(argv[i], "--readahead_size=",
|
|
|
|
"readahead_size must be numeric", &tmp_val)) {
|
|
|
|
readahead_size = static_cast<size_t>(tmp_val);
|
2017-08-11 22:49:17 +00:00
|
|
|
} else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
|
|
|
|
std::string compression_types_csv = argv[i] + 20;
|
|
|
|
std::istringstream iss(compression_types_csv);
|
|
|
|
std::string compression_type;
|
2020-04-27 19:33:49 +00:00
|
|
|
has_specified_compression_types = true;
|
2017-08-11 22:49:17 +00:00
|
|
|
while (std::getline(iss, compression_type, ',')) {
|
|
|
|
auto iter = std::find_if(
|
|
|
|
kCompressions.begin(), kCompressions.end(),
|
|
|
|
[&compression_type](std::pair<CompressionType, const char*> curr) {
|
|
|
|
return curr.second == compression_type;
|
|
|
|
});
|
|
|
|
if (iter == kCompressions.end()) {
|
|
|
|
fprintf(stderr, "%s is not a valid CompressionType\n",
|
|
|
|
compression_type.c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
compression_types.emplace_back(*iter);
|
|
|
|
}
|
2016-11-10 18:06:06 +00:00
|
|
|
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
|
|
|
|
std::string in_key(argv[i] + 21);
|
|
|
|
try {
|
2020-02-20 20:07:53 +00:00
|
|
|
in_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(in_key);
|
2016-11-10 18:06:06 +00:00
|
|
|
} catch (...) {
|
2022-10-25 21:29:41 +00:00
|
|
|
std::cerr << "ERROR: Invalid key input '" << in_key
|
|
|
|
<< "' Use 0x{hex representation of internal rocksdb key}"
|
|
|
|
<< std::endl;
|
2016-11-10 18:06:06 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
Slice sl_key = ROCKSDB_NAMESPACE::Slice(in_key);
|
2016-11-10 18:06:06 +00:00
|
|
|
ParsedInternalKey ikey;
|
|
|
|
int retc = 0;
|
2020-10-28 17:11:13 +00:00
|
|
|
Status pik_status =
|
|
|
|
ParseInternalKey(sl_key, &ikey, true /* log_err_key */);
|
|
|
|
if (!pik_status.ok()) {
|
|
|
|
std::cerr << pik_status.getState() << "\n";
|
2016-11-10 18:06:06 +00:00
|
|
|
retc = -1;
|
|
|
|
}
|
2020-10-28 17:11:13 +00:00
|
|
|
fprintf(stdout, "key=%s\n", ikey.DebugString(true, true).c_str());
|
2016-11-10 18:06:06 +00:00
|
|
|
return retc;
|
2020-05-13 01:21:32 +00:00
|
|
|
} else if (ParseIntArg(argv[i], "--compression_level_from=",
|
|
|
|
"compression_level_from must be numeric",
|
|
|
|
&tmp_val)) {
|
2020-04-27 19:33:49 +00:00
|
|
|
has_compression_level_from = true;
|
2020-05-13 01:21:32 +00:00
|
|
|
compress_level_from = static_cast<int>(tmp_val);
|
|
|
|
} else if (ParseIntArg(argv[i], "--compression_level_to=",
|
|
|
|
"compression_level_to must be numeric", &tmp_val)) {
|
2020-04-27 19:33:49 +00:00
|
|
|
has_compression_level_to = true;
|
2020-05-13 01:21:32 +00:00
|
|
|
compress_level_to = static_cast<int>(tmp_val);
|
2020-09-03 22:48:29 +00:00
|
|
|
} else if (ParseIntArg(argv[i], "--compression_max_dict_bytes=",
|
|
|
|
"compression_max_dict_bytes must be numeric",
|
|
|
|
&tmp_val)) {
|
2022-05-05 20:08:21 +00:00
|
|
|
if (tmp_val < 0 || tmp_val > std::numeric_limits<uint32_t>::max()) {
|
2020-09-03 22:48:29 +00:00
|
|
|
fprintf(stderr, "compression_max_dict_bytes must be a uint32_t: '%s'\n",
|
|
|
|
argv[i]);
|
|
|
|
print_help(/*to_stderr*/ true);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
compression_max_dict_bytes = static_cast<uint32_t>(tmp_val);
|
|
|
|
} else if (ParseIntArg(argv[i], "--compression_zstd_max_train_bytes=",
|
|
|
|
"compression_zstd_max_train_bytes must be numeric",
|
|
|
|
&tmp_val)) {
|
2022-05-05 20:08:21 +00:00
|
|
|
if (tmp_val < 0 || tmp_val > std::numeric_limits<uint32_t>::max()) {
|
2020-09-03 22:48:29 +00:00
|
|
|
fprintf(stderr,
|
|
|
|
"compression_zstd_max_train_bytes must be a uint32_t: '%s'\n",
|
|
|
|
argv[i]);
|
|
|
|
print_help(/*to_stderr*/ true);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
compression_zstd_max_train_bytes = static_cast<uint32_t>(tmp_val);
|
Limit buffering for collecting samples for compression dictionary (#7970)
Summary:
For dictionary compression, we need to collect some representative samples of the data to be compressed, which we use to either generate or train (when `CompressionOptions::zstd_max_train_bytes > 0`) a dictionary. Previously, the strategy was to buffer all the data blocks during flush, and up to the target file size during compaction. That strategy allowed us to randomly pick samples from as wide a range as possible that'd be guaranteed to land in a single output file.
However, some users try to make huge files in memory-constrained environments, where this strategy can cause OOM. This PR introduces an option, `CompressionOptions::max_dict_buffer_bytes`, that limits how much data blocks are buffered before we switch to unbuffered mode (which means creating the per-SST dictionary, writing out the buffered data, and compressing/writing new blocks as soon as they are built). It is not strict as we currently buffer more than just data blocks -- also keys are buffered. But it does make a step towards giving users predictable memory usage.
Related changes include:
- Changed sampling for dictionary compression to select unique data blocks when there is limited availability of data blocks
- Made use of `BlockBuilder::SwapAndReset()` to save an allocation+memcpy when buffering data blocks for building a dictionary
- Changed `ParseBoolean()` to accept an input containing characters after the boolean. This is necessary since, with this PR, a value for `CompressionOptions::enabled` is no longer necessarily the final component in the `CompressionOptions` string.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7970
Test Plan:
- updated `CompressionOptions` unit tests to verify limit is respected (to the extent expected in the current implementation) in various scenarios of flush/compaction to bottommost/non-bottommost level
- looked at jemalloc heap profiles right before and after switching to unbuffered mode during flush/compaction. Verified memory usage in buffering is proportional to the limit set.
Reviewed By: pdillinger
Differential Revision: D26467994
Pulled By: ajkr
fbshipit-source-id: 3da4ef9fba59974e4ef40e40c01611002c861465
2021-02-19 22:06:59 +00:00
|
|
|
} else if (ParseIntArg(argv[i], "--compression_max_dict_buffer_bytes=",
|
|
|
|
"compression_max_dict_buffer_bytes must be numeric",
|
|
|
|
&tmp_val)) {
|
|
|
|
if (tmp_val < 0) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"compression_max_dict_buffer_bytes must be positive: '%s'\n",
|
|
|
|
argv[i]);
|
|
|
|
print_help(/*to_stderr*/ true);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
compression_max_dict_buffer_bytes = static_cast<uint64_t>(tmp_val);
|
Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857)
Summary:
An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857
Test Plan:
#### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data:
Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible.
```
# linked local ZSTD with version 1.5.2
# DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench
dict_bytes=16384
train_bytes=1048576
echo "========== No Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== Raw Content Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== FinalizeDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== TrainDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
# Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory.
# before compression data size: 1.2GB
dict_bytes=16384
max_dict_buffer_bytes = 1048576
space cpu/memory
No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k
Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k
FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k
TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k
```
#### Benchmark on 10 sample SST files for spacing saving and CPU time on compression:
FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression.
```
dict_bytes=16384
train_bytes=1048576
for sst_file in `ls ../temp/myrock-sst/`
do
echo "********** $sst_file **********"
echo "========== No Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD
echo "========== Raw Content Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes
echo "========== FinalizeDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict
echo "========== TrainDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes
done
010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst
No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254
Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523
FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433
TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593
```
#### Decompression/Read test:
With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads.
```
dict_bytes=16384
train_bytes=1048576
echo "No Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s
echo "Raw Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s
echo "FinalizeDict"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s
echo "Train Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s
No Dictionary
readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found)
Raw Dictionary
readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found)
FinalizeDict
readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found)
Train Dictionary
readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found)
```
Reviewed By: ajkr
Differential Revision: D35720026
Pulled By: cbi42
fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
2022-05-20 19:09:09 +00:00
|
|
|
} else if (strcmp(argv[i], "--compression_use_zstd_finalize_dict") == 0) {
|
|
|
|
compression_use_zstd_finalize_dict = true;
|
2020-06-09 17:01:12 +00:00
|
|
|
} else if (strcmp(argv[i], "--help") == 0) {
|
|
|
|
print_help(/*to_stderr*/ false);
|
|
|
|
return 0;
|
|
|
|
} else if (strcmp(argv[i], "--version") == 0) {
|
2021-01-29 01:40:24 +00:00
|
|
|
printf("%s\n", GetRocksBuildInfoAsString("sst_dump").c_str());
|
2020-06-09 17:01:12 +00:00
|
|
|
return 0;
|
2020-05-13 01:21:32 +00:00
|
|
|
} else {
|
2017-03-13 17:24:52 +00:00
|
|
|
fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]);
|
2020-06-09 17:01:12 +00:00
|
|
|
print_help(/*to_stderr*/ true);
|
|
|
|
return 1;
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-25 21:29:41 +00:00
|
|
|
if (has_compression_level_from && has_compression_level_to) {
|
|
|
|
if (!has_specified_compression_types || compression_types.size() != 1) {
|
2020-04-27 19:33:49 +00:00
|
|
|
fprintf(stderr, "Specify one compression type.\n\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
2022-10-25 21:29:41 +00:00
|
|
|
} else if (has_compression_level_from || has_compression_level_to) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Specify both --compression_level_from and "
|
|
|
|
"--compression_level_to.\n\n");
|
2020-04-27 19:33:49 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2017-03-13 17:24:52 +00:00
|
|
|
if (use_from_as_prefix && has_from) {
|
|
|
|
fprintf(stderr, "Cannot specify --prefix and --from\n\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
if (input_key_hex) {
|
2017-03-13 17:24:52 +00:00
|
|
|
if (has_from || use_from_as_prefix) {
|
2020-02-20 20:07:53 +00:00
|
|
|
from_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(from_key);
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
|
|
|
if (has_to) {
|
2020-02-20 20:07:53 +00:00
|
|
|
to_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(to_key);
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dir_or_file == nullptr) {
|
2017-03-13 17:24:52 +00:00
|
|
|
fprintf(stderr, "file or directory must be specified.\n\n");
|
2020-06-09 17:01:12 +00:00
|
|
|
print_help(/*to_stderr*/ true);
|
2014-11-08 01:23:58 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
|
2019-10-09 02:17:39 +00:00
|
|
|
|
|
|
|
// If caller of SSTDumpTool::Run(...) does not specify a different env other
|
2021-03-10 04:47:26 +00:00
|
|
|
// than Env::Default(), then try to load custom env based on env_uri/fs_uri.
|
2019-10-09 02:17:39 +00:00
|
|
|
// Otherwise, the caller is responsible for creating custom env.
|
2021-06-15 10:42:52 +00:00
|
|
|
{
|
|
|
|
ConfigOptions config_options;
|
|
|
|
config_options.env = options.env;
|
|
|
|
Status s = Env::CreateFromUri(config_options, env_uri, fs_uri, &options.env,
|
|
|
|
&env_guard);
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "CreateEnvFromUri: %s\n", s.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "options.env is %p\n", options.env);
|
2019-10-09 02:17:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
std::vector<std::string> filenames;
|
2020-02-20 20:07:53 +00:00
|
|
|
ROCKSDB_NAMESPACE::Env* env = options.env;
|
|
|
|
ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
|
2014-11-08 01:23:58 +00:00
|
|
|
bool dir = true;
|
2020-06-08 20:56:22 +00:00
|
|
|
if (!st.ok() || filenames.empty()) {
|
|
|
|
// dir_or_file does not exist or does not contain children
|
|
|
|
// Check its existence first
|
|
|
|
Status s = env->FileExists(dir_or_file);
|
|
|
|
// dir_or_file does not exist
|
|
|
|
if (!s.ok()) {
|
|
|
|
fprintf(stderr, "%s%s: No such file or directory\n", s.ToString().c_str(),
|
|
|
|
dir_or_file);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
// dir_or_file exists and is treated as a "file"
|
|
|
|
// since it has no children
|
|
|
|
// This is ok since later it will be checked
|
|
|
|
// that whether it is a valid sst or not
|
|
|
|
// (A directory "file" is not a valid sst)
|
2014-11-08 01:23:58 +00:00
|
|
|
filenames.clear();
|
2023-12-01 19:15:17 +00:00
|
|
|
filenames.emplace_back(dir_or_file);
|
2014-11-08 01:23:58 +00:00
|
|
|
dir = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t total_read = 0;
|
2020-06-08 20:56:22 +00:00
|
|
|
// List of RocksDB SST file without corruption
|
|
|
|
std::vector<std::string> valid_sst_files;
|
2014-11-08 01:23:58 +00:00
|
|
|
for (size_t i = 0; i < filenames.size(); i++) {
|
|
|
|
std::string filename = filenames.at(i);
|
|
|
|
if (filename.length() <= 4 ||
|
|
|
|
filename.rfind(".sst") != filename.length() - 4) {
|
|
|
|
// ignore
|
|
|
|
continue;
|
|
|
|
}
|
2020-06-08 20:56:22 +00:00
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
if (dir) {
|
|
|
|
filename = std::string(dir_or_file) + "/" + filename;
|
|
|
|
}
|
2014-12-23 21:24:07 +00:00
|
|
|
|
2023-07-05 21:12:06 +00:00
|
|
|
if (command == "verify") {
|
|
|
|
verify_checksum = true;
|
|
|
|
}
|
|
|
|
|
New backup meta schema, with file temperatures (#9660)
Summary:
The primary goal of this change is to add support for backing up and
restoring (applying on restore) file temperature metadata, without
committing to either the DB manifest or the FS reported "current"
temperatures being exclusive "source of truth".
To achieve this goal, we need to add temperature information to backup
metadata, which requires updated backup meta schema. Fortunately I
prepared for this in https://github.com/facebook/rocksdb/issues/8069, which began forward compatibility in version
6.19.0 for this kind of schema update. (Previously, backup meta schema
was not extensible! Making this schema update public will allow some
other "nice to have" features like taking backups with hard links, and
avoiding crc32c checksum computation when another checksum is already
available.) While schema version 2 is newly public, the default schema
version is still 1. Until we change the default, users will need to set
to 2 to enable features like temperature data backup+restore. New
metadata like temperature information will be ignored with a warning
in versions before this change and since 6.19.0. The metadata is
considered ignorable because a functioning DB can be restored without
it.
Some detail:
* Some renaming because "future schema" is now just public schema 2.
* Initialize some atomics in TestFs (linter reported)
* Add temperature hint support to SstFileDumper (used by BackupEngine)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9660
Test Plan:
related unit test majorly updated for the new functionality,
including some shared testing support for tracking temperatures in a FS.
Some other tests and testing hooks into production code also updated for
making the backup meta schema change public.
Reviewed By: ajkr
Differential Revision: D34686968
Pulled By: pdillinger
fbshipit-source-id: 3ac1fa3e67ee97ca8a5103d79cc87d872c1d862a
2022-03-18 18:06:17 +00:00
|
|
|
ROCKSDB_NAMESPACE::SstFileDumper dumper(
|
|
|
|
options, filename, Temperature::kUnknown, readahead_size,
|
|
|
|
verify_checksum, output_hex, decode_blob_index);
|
2020-06-08 20:56:22 +00:00
|
|
|
// Not a valid SST
|
2018-11-27 20:59:27 +00:00
|
|
|
if (!dumper.getStatus().ok()) {
|
2014-12-23 21:24:07 +00:00
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
2018-11-27 20:59:27 +00:00
|
|
|
dumper.getStatus().ToString().c_str());
|
2017-01-04 02:24:15 +00:00
|
|
|
continue;
|
2020-06-08 20:56:22 +00:00
|
|
|
} else {
|
|
|
|
valid_sst_files.push_back(filename);
|
|
|
|
// Print out from and to key information once
|
|
|
|
// where there is at least one valid SST
|
|
|
|
if (valid_sst_files.size() == 1) {
|
|
|
|
// from_key and to_key are only used for "check", "scan", or ""
|
|
|
|
if (command == "check" || command == "scan" || command == "") {
|
|
|
|
fprintf(stdout, "from [%s] to [%s]\n",
|
|
|
|
ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
|
|
|
|
ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
|
|
|
|
}
|
|
|
|
}
|
2014-12-23 21:24:07 +00:00
|
|
|
}
|
|
|
|
|
2017-08-11 22:49:17 +00:00
|
|
|
if (command == "recompress") {
|
2020-09-05 02:25:20 +00:00
|
|
|
st = dumper.ShowAllCompressionSizes(
|
2017-08-11 22:49:17 +00:00
|
|
|
set_block_size ? block_size : 16384,
|
2020-04-27 19:33:49 +00:00
|
|
|
compression_types.empty() ? kCompressions : compression_types,
|
2020-09-03 22:48:29 +00:00
|
|
|
compress_level_from, compress_level_to, compression_max_dict_bytes,
|
Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857)
Summary:
An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857
Test Plan:
#### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data:
Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible.
```
# linked local ZSTD with version 1.5.2
# DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench
dict_bytes=16384
train_bytes=1048576
echo "========== No Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== Raw Content Dictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== FinalizeDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
echo "========== TrainDictionary =========="
TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1
TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed
du -hc /dev/shm/dbbench/*sst | grep total
# Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory.
# before compression data size: 1.2GB
dict_bytes=16384
max_dict_buffer_bytes = 1048576
space cpu/memory
No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k
Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k
FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k
TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k
```
#### Benchmark on 10 sample SST files for spacing saving and CPU time on compression:
FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression.
```
dict_bytes=16384
train_bytes=1048576
for sst_file in `ls ../temp/myrock-sst/`
do
echo "********** $sst_file **********"
echo "========== No Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD
echo "========== Raw Content Dictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes
echo "========== FinalizeDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict
echo "========== TrainDictionary =========="
./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes
done
010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst
No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254
Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523
FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433
TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593
```
#### Decompression/Read test:
With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads.
```
dict_bytes=16384
train_bytes=1048576
echo "No Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s
echo "Raw Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s
echo "FinalizeDict"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s
echo "Train Dictionary"
TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1
TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s
No Dictionary
readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found)
Raw Dictionary
readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found)
FinalizeDict
readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found)
Train Dictionary
readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found)
```
Reviewed By: ajkr
Differential Revision: D35720026
Pulled By: cbi42
fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
2022-05-20 19:09:09 +00:00
|
|
|
compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes,
|
|
|
|
!compression_use_zstd_finalize_dict);
|
2020-09-05 02:25:20 +00:00
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "Failed to recompress: %s\n", st.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
}
|
2015-07-24 00:05:33 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-12-23 21:24:07 +00:00
|
|
|
if (command == "raw") {
|
|
|
|
std::string out_filename = filename.substr(0, filename.length() - 4);
|
|
|
|
out_filename.append("_dump.txt");
|
|
|
|
|
2018-11-27 20:59:27 +00:00
|
|
|
st = dumper.DumpTable(out_filename);
|
2014-12-23 21:24:07 +00:00
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
exit(1);
|
|
|
|
} else {
|
2023-12-01 19:15:17 +00:00
|
|
|
fprintf(stdout, "raw dump written to file %s\n", out_filename.data());
|
2014-12-23 21:24:07 +00:00
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2014-11-08 01:23:58 +00:00
|
|
|
// scan all files in give file path.
|
|
|
|
if (command == "" || command == "scan" || command == "check") {
|
2018-11-27 20:59:27 +00:00
|
|
|
st = dumper.ReadSequential(
|
2017-03-13 17:24:52 +00:00
|
|
|
command == "scan", read_num > 0 ? (read_num - total_read) : read_num,
|
|
|
|
has_from || use_from_as_prefix, from_key, has_to, to_key,
|
|
|
|
use_from_as_prefix);
|
2014-11-08 01:23:58 +00:00
|
|
|
if (!st.ok()) {
|
2022-10-25 21:29:41 +00:00
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
2018-11-27 20:59:27 +00:00
|
|
|
total_read += dumper.GetReadNumber();
|
2014-11-08 01:23:58 +00:00
|
|
|
if (read_num > 0 && total_read > read_num) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-01-04 02:24:15 +00:00
|
|
|
|
2017-08-09 22:49:40 +00:00
|
|
|
if (command == "verify") {
|
2018-11-27 20:59:27 +00:00
|
|
|
st = dumper.VerifyChecksum();
|
2017-08-09 22:49:40 +00:00
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(),
|
|
|
|
st.ToString().c_str());
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "The file is ok\n");
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-01-04 02:24:15 +00:00
|
|
|
if (show_properties || show_summary) {
|
2020-02-20 20:07:53 +00:00
|
|
|
const ROCKSDB_NAMESPACE::TableProperties* table_properties;
|
2014-11-08 01:23:58 +00:00
|
|
|
|
2020-02-20 20:07:53 +00:00
|
|
|
std::shared_ptr<const ROCKSDB_NAMESPACE::TableProperties>
|
2014-11-08 01:23:58 +00:00
|
|
|
table_properties_from_reader;
|
2018-11-27 20:59:27 +00:00
|
|
|
st = dumper.ReadTableProperties(&table_properties_from_reader);
|
2014-11-08 01:23:58 +00:00
|
|
|
if (!st.ok()) {
|
|
|
|
fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
|
|
|
|
fprintf(stderr, "Try to use initial table properties\n");
|
2018-11-27 20:59:27 +00:00
|
|
|
table_properties = dumper.GetInitTableProperties();
|
2014-11-08 01:23:58 +00:00
|
|
|
} else {
|
|
|
|
table_properties = table_properties_from_reader.get();
|
|
|
|
}
|
|
|
|
if (table_properties != nullptr) {
|
2017-01-04 02:24:15 +00:00
|
|
|
if (show_properties) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"Table Properties:\n"
|
|
|
|
"------------------------------\n"
|
|
|
|
" %s",
|
|
|
|
table_properties->ToString("\n ", ": ").c_str());
|
2016-05-19 21:24:48 +00:00
|
|
|
}
|
2017-01-04 02:24:15 +00:00
|
|
|
total_num_files += 1;
|
|
|
|
total_num_data_blocks += table_properties->num_data_blocks;
|
|
|
|
total_data_block_size += table_properties->data_size;
|
|
|
|
total_index_block_size += table_properties->index_size;
|
|
|
|
total_filter_block_size += table_properties->filter_size;
|
2019-10-18 21:43:17 +00:00
|
|
|
if (show_properties) {
|
|
|
|
fprintf(stdout,
|
|
|
|
"Raw user collected properties\n"
|
|
|
|
"------------------------------\n");
|
|
|
|
for (const auto& kv : table_properties->user_collected_properties) {
|
|
|
|
std::string prop_name = kv.first;
|
|
|
|
std::string prop_val = Slice(kv.second).ToString(true);
|
|
|
|
fprintf(stdout, " # %s: 0x%s\n", prop_name.c_str(),
|
|
|
|
prop_val.c_str());
|
|
|
|
}
|
2017-01-04 02:24:15 +00:00
|
|
|
}
|
2019-10-18 21:43:17 +00:00
|
|
|
} else {
|
|
|
|
fprintf(stderr, "Reader unexpectedly returned null properties\n");
|
2016-12-14 19:09:50 +00:00
|
|
|
}
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
|
|
|
}
|
2017-01-04 02:24:15 +00:00
|
|
|
if (show_summary) {
|
|
|
|
fprintf(stdout, "total number of files: %" PRIu64 "\n", total_num_files);
|
|
|
|
fprintf(stdout, "total number of data blocks: %" PRIu64 "\n",
|
|
|
|
total_num_data_blocks);
|
|
|
|
fprintf(stdout, "total data block size: %" PRIu64 "\n",
|
|
|
|
total_data_block_size);
|
|
|
|
fprintf(stdout, "total index block size: %" PRIu64 "\n",
|
|
|
|
total_index_block_size);
|
|
|
|
fprintf(stdout, "total filter block size: %" PRIu64 "\n",
|
|
|
|
total_filter_block_size);
|
|
|
|
}
|
2020-06-08 20:56:22 +00:00
|
|
|
|
|
|
|
if (valid_sst_files.empty()) {
|
|
|
|
// No valid SST files are found
|
|
|
|
// Exit with an error state
|
|
|
|
if (dir) {
|
|
|
|
fprintf(stdout, "------------------------------\n");
|
|
|
|
fprintf(stderr, "No valid SST files found in %s\n", dir_or_file);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "%s is not a valid SST file\n", dir_or_file);
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
if (command == "identify") {
|
|
|
|
if (dir) {
|
|
|
|
fprintf(stdout, "------------------------------\n");
|
|
|
|
fprintf(stdout, "List of valid SST files found in %s:\n", dir_or_file);
|
|
|
|
for (const auto& f : valid_sst_files) {
|
|
|
|
fprintf(stdout, "%s\n", f.c_str());
|
|
|
|
}
|
|
|
|
fprintf(stdout, "Number of valid SST files: %zu\n",
|
|
|
|
valid_sst_files.size());
|
|
|
|
} else {
|
|
|
|
fprintf(stdout, "%s is a valid SST file\n", dir_or_file);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// At least one valid SST
|
|
|
|
// exit with a success state
|
|
|
|
return 0;
|
|
|
|
}
|
2014-11-08 01:23:58 +00:00
|
|
|
}
|
2020-02-20 20:07:53 +00:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2014-11-13 19:39:30 +00:00
|
|
|
|