rocksdb/java/rocksjni/compression_options.cc

211 lines
6.6 KiB
C++
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// This file implements the "bridge" between Java and C++ for
// ROCKSDB_NAMESPACE::CompressionOptions.
#include <jni.h>
#include "include/org_rocksdb_CompressionOptions.h"
#include "rocksdb/advanced_options.h"
#include "rocksjni/cplusplus_to_java_convert.h"
/*
* Class: org_rocksdb_CompressionOptions
* Method: newCompressionOptions
* Signature: ()J
*/
jlong Java_org_rocksdb_CompressionOptions_newCompressionOptions(
JNIEnv*, jclass) {
const auto* opt = new ROCKSDB_NAMESPACE::CompressionOptions();
return GET_CPLUSPLUS_POINTER(opt);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setWindowBits
* Signature: (JI)V
*/
void Java_org_rocksdb_CompressionOptions_setWindowBits(
JNIEnv*, jobject, jlong jhandle, jint jwindow_bits) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->window_bits = static_cast<int>(jwindow_bits);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: windowBits
* Signature: (J)I
*/
jint Java_org_rocksdb_CompressionOptions_windowBits(
JNIEnv*, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jint>(opt->window_bits);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setLevel
* Signature: (JI)V
*/
void Java_org_rocksdb_CompressionOptions_setLevel(
JNIEnv*, jobject, jlong jhandle, jint jlevel) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->level = static_cast<int>(jlevel);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: level
* Signature: (J)I
*/
jint Java_org_rocksdb_CompressionOptions_level(
JNIEnv*, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jint>(opt->level);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setStrategy
* Signature: (JI)V
*/
void Java_org_rocksdb_CompressionOptions_setStrategy(
JNIEnv*, jobject, jlong jhandle, jint jstrategy) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->strategy = static_cast<int>(jstrategy);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: strategy
* Signature: (J)I
*/
jint Java_org_rocksdb_CompressionOptions_strategy(
JNIEnv*, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jint>(opt->strategy);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setMaxDictBytes
* Signature: (JI)V
*/
void Java_org_rocksdb_CompressionOptions_setMaxDictBytes(
JNIEnv*, jobject, jlong jhandle, jint jmax_dict_bytes) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->max_dict_bytes = static_cast<uint32_t>(jmax_dict_bytes);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: maxDictBytes
* Signature: (J)I
*/
jint Java_org_rocksdb_CompressionOptions_maxDictBytes(
JNIEnv*, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jint>(opt->max_dict_bytes);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setZstdMaxTrainBytes
* Signature: (JI)V
*/
void Java_org_rocksdb_CompressionOptions_setZstdMaxTrainBytes(
JNIEnv*, jobject, jlong jhandle, jint jzstd_max_train_bytes) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->zstd_max_train_bytes = static_cast<uint32_t>(jzstd_max_train_bytes);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: zstdMaxTrainBytes
* Signature: (J)I
*/
jint Java_org_rocksdb_CompressionOptions_zstdMaxTrainBytes(
JNIEnv *, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jint>(opt->zstd_max_train_bytes);
}
Limit buffering for collecting samples for compression dictionary (#7970) Summary: For dictionary compression, we need to collect some representative samples of the data to be compressed, which we use to either generate or train (when `CompressionOptions::zstd_max_train_bytes > 0`) a dictionary. Previously, the strategy was to buffer all the data blocks during flush, and up to the target file size during compaction. That strategy allowed us to randomly pick samples from as wide a range as possible that'd be guaranteed to land in a single output file. However, some users try to make huge files in memory-constrained environments, where this strategy can cause OOM. This PR introduces an option, `CompressionOptions::max_dict_buffer_bytes`, that limits how much data blocks are buffered before we switch to unbuffered mode (which means creating the per-SST dictionary, writing out the buffered data, and compressing/writing new blocks as soon as they are built). It is not strict as we currently buffer more than just data blocks -- also keys are buffered. But it does make a step towards giving users predictable memory usage. Related changes include: - Changed sampling for dictionary compression to select unique data blocks when there is limited availability of data blocks - Made use of `BlockBuilder::SwapAndReset()` to save an allocation+memcpy when buffering data blocks for building a dictionary - Changed `ParseBoolean()` to accept an input containing characters after the boolean. This is necessary since, with this PR, a value for `CompressionOptions::enabled` is no longer necessarily the final component in the `CompressionOptions` string. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7970 Test Plan: - updated `CompressionOptions` unit tests to verify limit is respected (to the extent expected in the current implementation) in various scenarios of flush/compaction to bottommost/non-bottommost level - looked at jemalloc heap profiles right before and after switching to unbuffered mode during flush/compaction. Verified memory usage in buffering is proportional to the limit set. Reviewed By: pdillinger Differential Revision: D26467994 Pulled By: ajkr fbshipit-source-id: 3da4ef9fba59974e4ef40e40c01611002c861465
2021-02-19 22:06:59 +00:00
/*
* Class: org_rocksdb_CompressionOptions
* Method: setMaxDictBufferBytes
* Signature: (JJ)V
*/
void Java_org_rocksdb_CompressionOptions_setMaxDictBufferBytes(
JNIEnv*, jobject, jlong jhandle, jlong jmax_dict_buffer_bytes) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->max_dict_buffer_bytes = static_cast<uint64_t>(jmax_dict_buffer_bytes);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: maxDictBufferBytes
* Signature: (J)J
*/
jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject,
jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<jlong>(opt->max_dict_buffer_bytes);
}
Support using ZDICT_finalizeDictionary to generate zstd dictionary (#9857) Summary: An untrained dictionary is currently simply the concatenation of several samples. The ZSTD API, ZDICT_finalizeDictionary(), can improve such a dictionary's effectiveness at low cost. This PR changes how dictionary is created by calling the ZSTD ZDICT_finalizeDictionary() API instead of creating raw content dictionary (when max_dict_buffer_bytes > 0), and pass in all buffered uncompressed data blocks as samples. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9857 Test Plan: #### db_bench test for cpu/memory of compression+decompression and space saving on synthetic data: Set up: change the parameter [here](https://github.com/facebook/rocksdb/blob/fb9a167a55e0970b1ef6f67c1600c8d9c4c6114f/tools/db_bench_tool.cc#L1766) to 16384 to make synthetic data more compressible. ``` # linked local ZSTD with version 1.5.2 # DEBUG_LEVEL=0 ROCKSDB_NO_FBCODE=1 ROCKSDB_DISABLE_ZSTD=1 EXTRA_CXXFLAGS="-DZSTD_STATIC_LINKING_ONLY -DZSTD -I/data/users/changyubi/install/include/" EXTRA_LDFLAGS="-L/data/users/changyubi/install/lib/ -l:libzstd.a" make -j32 db_bench dict_bytes=16384 train_bytes=1048576 echo "========== No Dictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=0 -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== Raw Content Dictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench_main -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench_main -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== FinalizeDictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total echo "========== TrainDictionary ==========" TEST_TMPDIR=/dev/shm ./db_bench -benchmarks=filluniquerandom,compact -num=10000000 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 -max_background_jobs=24 -memtablerep=vector -allow_concurrent_memtable_write=false -disable_wal=true -max_write_buffer_number=8 >/dev/null 2>&1 TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -use_existing_db=true -benchmarks=compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -block_size=4096 2>&1 | grep elapsed du -hc /dev/shm/dbbench/*sst | grep total # Result: TrainDictionary is much better on space saving, but FinalizeDictionary seems to use less memory. # before compression data size: 1.2GB dict_bytes=16384 max_dict_buffer_bytes = 1048576 space cpu/memory No Dictionary 468M 14.93user 1.00system 0:15.92elapsed 100%CPU (0avgtext+0avgdata 23904maxresident)k Raw Dictionary 251M 15.81user 0.80system 0:16.56elapsed 100%CPU (0avgtext+0avgdata 156808maxresident)k FinalizeDictionary 236M 11.93user 0.64system 0:12.56elapsed 100%CPU (0avgtext+0avgdata 89548maxresident)k TrainDictionary 84M 7.29user 0.45system 0:07.75elapsed 100%CPU (0avgtext+0avgdata 97288maxresident)k ``` #### Benchmark on 10 sample SST files for spacing saving and CPU time on compression: FinalizeDictionary is comparable to TrainDictionary in terms of space saving, and takes less time in compression. ``` dict_bytes=16384 train_bytes=1048576 for sst_file in `ls ../temp/myrock-sst/` do echo "********** $sst_file **********" echo "========== No Dictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD echo "========== Raw Content Dictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes echo "========== FinalizeDictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes --compression_use_zstd_finalize_dict echo "========== TrainDictionary ==========" ./sst_dump --file="../temp/myrock-sst/$sst_file" --command=recompress --compression_level_from=6 --compression_level_to=6 --compression_types=kZSTD --compression_max_dict_bytes=$dict_bytes --compression_zstd_max_train_bytes=$train_bytes done 010240.sst (Size/Time) 011029.sst 013184.sst 021552.sst 185054.sst 185137.sst 191666.sst 7560381.sst 7604174.sst 7635312.sst No Dictionary 28165569 / 2614419 32899411 / 2976832 32977848 / 3055542 31966329 / 2004590 33614351 / 1755877 33429029 / 1717042 33611933 / 1776936 33634045 / 2771417 33789721 / 2205414 33592194 / 388254 Raw Content Dictionary 28019950 / 2697961 33748665 / 3572422 33896373 / 3534701 26418431 / 2259658 28560825 / 1839168 28455030 / 1846039 28494319 / 1861349 32391599 / 3095649 33772142 / 2407843 33592230 / 474523 FinalizeDictionary 27896012 / 2650029 33763886 / 3719427 33904283 / 3552793 26008225 / 2198033 28111872 / 1869530 28014374 / 1789771 28047706 / 1848300 32296254 / 3204027 33698698 / 2381468 33592344 / 517433 TrainDictionary 28046089 / 2740037 33706480 / 3679019 33885741 / 3629351 25087123 / 2204558 27194353 / 1970207 27234229 / 1896811 27166710 / 1903119 32011041 / 3322315 32730692 / 2406146 33608631 / 570593 ``` #### Decompression/Read test: With FinalizeDictionary/TrainDictionary, some data structure used for decompression are in stored in dictionary, so they are expected to be faster in terms of decompression/reads. ``` dict_bytes=16384 train_bytes=1048576 echo "No Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=0 > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=0 2>&1 | grep MB/s echo "Raw Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes 2>&1 | grep MB/s echo "FinalizeDict" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes -compression_use_zstd_dict_trainer=false 2>&1 | grep MB/s echo "Train Dictionary" TEST_TMPDIR=/dev/shm/ ./db_bench -benchmarks=filluniquerandom,compact -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes > /dev/null 2>&1 TEST_TMPDIR=/dev/shm/ ./db_bench -use_existing_db=true -benchmarks=readrandom -cache_size=0 -compression_type=zstd -compression_max_dict_bytes=$dict_bytes -compression_zstd_max_train_bytes=$train_bytes 2>&1 | grep MB/s No Dictionary readrandom : 12.183 micros/op 82082 ops/sec 12.183 seconds 1000000 operations; 9.1 MB/s (1000000 of 1000000 found) Raw Dictionary readrandom : 12.314 micros/op 81205 ops/sec 12.314 seconds 1000000 operations; 9.0 MB/s (1000000 of 1000000 found) FinalizeDict readrandom : 9.787 micros/op 102180 ops/sec 9.787 seconds 1000000 operations; 11.3 MB/s (1000000 of 1000000 found) Train Dictionary readrandom : 9.698 micros/op 103108 ops/sec 9.699 seconds 1000000 operations; 11.4 MB/s (1000000 of 1000000 found) ``` Reviewed By: ajkr Differential Revision: D35720026 Pulled By: cbi42 fbshipit-source-id: 24d230fdff0fd28a1bb650658798f00dfcfb2a1f
2022-05-20 19:09:09 +00:00
/*
* Class: org_rocksdb_CompressionOptions
* Method: setZstdMaxTrainBytes
* Signature: (JZ)V
*/
void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer(
JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE;
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: zstdMaxTrainBytes
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*,
jobject,
jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<bool>(opt->use_zstd_dict_trainer);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: setEnabled
* Signature: (JZ)V
*/
void Java_org_rocksdb_CompressionOptions_setEnabled(
JNIEnv*, jobject, jlong jhandle, jboolean jenabled) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
opt->enabled = jenabled == JNI_TRUE;
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: enabled
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_CompressionOptions_enabled(
JNIEnv*, jobject, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
return static_cast<bool>(opt->enabled);
}
/*
* Class: org_rocksdb_CompressionOptions
* Method: disposeInternal
* Signature: (J)V
*/
void Java_org_rocksdb_CompressionOptions_disposeInternal(
JNIEnv*, jobject, jlong jhandle) {
delete reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
}