mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-30 13:41:46 +00:00
WIP: use zstd ZDICT_finalizeDictionary() API
Summary: Use the zstd ZDICT_finalizeDictionary() API to optimize untrained dictionary compression. Test Plan: 1. Build an optimized db_bench binary: $ DEBUG_LEVEL=0 make -j24 db_bench 2. Define envvar $dict_bytes and run benchmark: $ TEST_TMPDIR=/dev/shm \ ./db_bench \ -benchmarks=filluniquerandom,compact \ -max_background_jobs=24 \ -num=10000000 \ -compression_type=zstd \ -block_size=4096 \ -memtablerep=vector \ -allow_concurrent_memtable_write=false \ -disable_wal=true \ -max_write_buffer_number=8 -compression_max_dict_bytes=$dict_bytes 3. Record the CPU/elapsed time of this command: $ TEST_TMPDIR=/dev/shm \ /usr/bin/time ./db_bench \ -use_existing_db=true \ -benchmarks=compact \ -compression_type=zstd \ -block_size=4096 -compression_max_dict_bytes=$dict_bytes Reviewers: Andrew Kryczka Subscribers: Tasks: T92484194 Tags: zstd, dictionary, untrained
This commit is contained in:
parent
a5e5130556
commit
36e3d246a0
|
@ -1433,7 +1433,8 @@ inline bool ZSTD_TrainDictionarySupported() {
|
|||
|
||||
inline std::string ZSTD_TrainDictionary(const std::string& samples,
|
||||
const std::vector<size_t>& sample_lens,
|
||||
size_t max_dict_bytes) {
|
||||
size_t max_dict_bytes,
|
||||
bool finalizeDict = false) {
|
||||
// Dictionary trainer is available since v0.6.1 for static linking, but not
|
||||
// available for dynamic linking until v1.1.3. For now we enable the feature
|
||||
// in v1.1.3+ only.
|
||||
|
@ -1442,16 +1443,32 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples,
|
|||
if (samples.empty()) {
|
||||
return "";
|
||||
}
|
||||
unsigned nbSamples = static_cast<unsigned>(sample_lens.size());
|
||||
std::string dict_data(max_dict_bytes, '\0');
|
||||
size_t dict_len = ZDICT_trainFromBuffer(
|
||||
&dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
|
||||
static_cast<unsigned>(sample_lens.size()));
|
||||
&dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0], nbSamples);
|
||||
if (ZDICT_isError(dict_len)) {
|
||||
return "";
|
||||
}
|
||||
assert(dict_len <= max_dict_bytes);
|
||||
dict_data.resize(dict_len);
|
||||
return dict_data;
|
||||
if (!finalizeDict) {
|
||||
return dict_data;
|
||||
}
|
||||
|
||||
ZDICT_params_t params;
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
std::string fin_dict_data(max_dict_bytes, '\0');
|
||||
size_t fin_dict_len = ZDICT_finalizeDictionary(
|
||||
&fin_dict_data[0], max_dict_bytes, &dict_data[0], dict_len, &samples[0],
|
||||
&sample_lens[0], nbSamples, params);
|
||||
if (ZDICT_isError(fin_dict_len)) {
|
||||
return "";
|
||||
}
|
||||
assert(fin_dict_len <= max_dict_bytes);
|
||||
fin_dict_data.resize(fin_dict_len);
|
||||
return fin_dict_data;
|
||||
|
||||
#else // up to v1.1.2
|
||||
assert(false);
|
||||
(void)samples;
|
||||
|
@ -1470,7 +1487,7 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples,
|
|||
// skips potential partial sample at the end of "samples"
|
||||
size_t num_samples = samples.size() >> sample_len_shift;
|
||||
std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
|
||||
return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
|
||||
return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes, false);
|
||||
#else // up to v1.1.2
|
||||
assert(false);
|
||||
(void)samples;
|
||||
|
|
Loading…
Reference in a new issue