mirror of https://github.com/facebook/rocksdb.git
Ignore max_compaction_bytes for compaction input that are within output key-range (#10835)
Summary: When picking compaction input files, we sometimes stop picking a file that is fully included in the output key-range due to hitting max_compaction_bytes. Including these input files can potentially reduce WA at the expense of larger compactions. Larger compaction should be fine as files from input level are usually 10X smaller than files from output level. This PR adds a mutable CF option `ignore_max_compaction_bytes_for_input` that is enabled by default. We can remove this option once we are sure it is safe. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10835 Test Plan: - CI, a unit test on max_compaction_bytes fails before turning this flag off. - Benchmark does not show much difference in WA: `./db_bench --benchmarks=fillrandom,waitforcompaction,stats,levelstats -max_background_jobs=12 -num=2000000000 -target_file_size_base=33554432 --write_buffer_size=33554432` ``` main: ** Compaction Stats [default] ** Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ L0 3/0 91.59 MB 0.8 70.9 0.0 70.9 200.8 129.9 0.0 1.5 25.2 71.2 2886.55 2463.45 9725 0.297 1093M 254K 0.0 0.0 L1 9/0 248.03 MB 1.0 392.0 129.8 262.2 391.7 129.5 0.0 3.0 69.0 68.9 5821.71 5536.90 804 7.241 6029M 5814K 0.0 0.0 L2 87/0 2.50 GB 1.0 537.0 128.5 408.5 533.8 125.2 0.7 4.2 69.5 69.1 7912.24 7323.70 4417 1.791 8299M 36M 0.0 0.0 L3 836/0 24.99 GB 1.0 616.9 118.3 498.7 594.5 95.8 5.2 5.0 66.9 64.5 9442.38 8490.28 4204 2.246 9749M 306M 0.0 0.0 L4 2355/0 62.95 GB 0.3 67.3 37.1 30.2 54.2 24.0 38.9 1.5 72.2 58.2 954.37 821.18 917 1.041 1076M 173M 0.0 0.0 Sum 3290/0 90.77 GB 0.0 1684.2 413.7 1270.5 1775.0 504.5 44.9 13.7 63.8 67.3 27017.25 24635.52 20067 1.346 26G 522M 0.0 0.0 Cumulative compaction: 1774.96 GB write, 154.29 MB/s write, 1684.19 GB read, 146.40 MB/s read, 27017.3 seconds This PR: ** Compaction Stats [default] ** Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ L0 3/0 45.71 MB 0.8 72.9 0.0 72.9 202.8 129.9 0.0 1.6 25.4 70.7 2938.16 2510.36 9741 0.302 1124M 265K 0.0 0.0 L1 8/0 234.54 MB 0.9 384.5 129.8 254.7 384.2 129.6 0.0 3.0 69.0 68.9 5708.08 5424.43 791 7.216 5913M 5753K 0.0 0.0 L2 84/0 2.47 GB 1.0 543.1 128.6 414.5 539.9 125.4 0.7 4.2 69.6 69.2 7989.31 7403.13 4418 1.808 8393M 36M 0.0 0.0 L3 839/0 24.96 GB 1.0 615.6 118.4 497.2 593.2 96.0 5.1 5.0 66.6 64.1 9471.23 8489.31 4193 2.259 9726M 306M 0.0 0.0 L4 2360/0 63.04 GB 0.3 67.6 37.3 30.3 54.4 24.1 38.9 1.5 71.5 57.6 967.30 827.99 907 1.066 1080M 173M 0.0 0.0 Sum 3294/0 90.75 GB 0.0 1683.8 414.2 1269.6 1774.5 504.9 44.8 13.7 63.7 67.1 27074.08 24655.22 20050 1.350 26G 522M 0.0 0.0 Cumulative compaction: 1774.52 GB write, 157.09 MB/s write, 1683.77 GB read, 149.06 MB/s read, 27074.1 seconds ``` Reviewed By: ajkr Differential Revision: D40518319 Pulled By: cbi42 fbshipit-source-id: f4ea614bc0ebefe007ffaf05bb9aec9a8ca25b60
This commit is contained in:
parent
8dd4bf6cff
commit
333abe9c55
|
@ -8,6 +8,7 @@
|
|||
* FIFO compaction now supports migrating from a multi-level DB via DB::Open(). During the migration phase, FIFO compaction picker will:
|
||||
* picks the sst file with the smallest starting key in the bottom-most non-empty level.
|
||||
* Note that during the migration phase, the file purge order will only be an approximation of "FIFO" as files in lower-level might sometime contain newer keys than files in upper-level.
|
||||
* Added an option `ignore_max_compaction_bytes_for_input` to ignore max_compaction_bytes limit when adding files to be compacted from input level. This should help reduce write amplification. The option is enabled by default.
|
||||
|
||||
### Bug Fixes
|
||||
* Fix a bug in io_uring_prep_cancel in AbortIO API for posix which expects sqe->addr to match with read request submitted and wrong paramter was being passed.
|
||||
|
|
|
@ -526,7 +526,8 @@ bool CompactionPicker::SetupOtherInputs(
|
|||
try_overlapping_inputs = false;
|
||||
}
|
||||
if (try_overlapping_inputs && expanded_inputs.size() > inputs->size() &&
|
||||
output_level_inputs_size + expanded_inputs_size < limit &&
|
||||
(mutable_cf_options.ignore_max_compaction_bytes_for_input ||
|
||||
output_level_inputs_size + expanded_inputs_size < limit) &&
|
||||
!AreFilesInCompaction(expanded_inputs.files)) {
|
||||
InternalKey new_start, new_limit;
|
||||
GetRange(expanded_inputs, &new_start, &new_limit);
|
||||
|
@ -549,7 +550,8 @@ bool CompactionPicker::SetupOtherInputs(
|
|||
base_index, nullptr);
|
||||
expanded_inputs_size = TotalFileSize(expanded_inputs.files);
|
||||
if (expanded_inputs.size() > inputs->size() &&
|
||||
output_level_inputs_size + expanded_inputs_size < limit &&
|
||||
(mutable_cf_options.ignore_max_compaction_bytes_for_input ||
|
||||
output_level_inputs_size + expanded_inputs_size < limit) &&
|
||||
!AreFilesInCompaction(expanded_inputs.files)) {
|
||||
expand_inputs = true;
|
||||
}
|
||||
|
|
|
@ -1340,6 +1340,7 @@ TEST_F(CompactionPickerTest, CompactionPriMinOverlapping4) {
|
|||
ioptions_.compaction_pri = kMinOverlappingRatio;
|
||||
mutable_cf_options_.max_bytes_for_level_base = 10000000;
|
||||
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
||||
mutable_cf_options_.ignore_max_compaction_bytes_for_input = false;
|
||||
|
||||
// file 7 and 8 over lap with the same file, but file 8 is smaller so
|
||||
// it will be picked.
|
||||
|
@ -2358,6 +2359,7 @@ TEST_F(CompactionPickerTest, IsBottommostLevelTest) {
|
|||
TEST_F(CompactionPickerTest, MaxCompactionBytesHit) {
|
||||
mutable_cf_options_.max_bytes_for_level_base = 1000000u;
|
||||
mutable_cf_options_.max_compaction_bytes = 800000u;
|
||||
mutable_cf_options_.ignore_max_compaction_bytes_for_input = false;
|
||||
ioptions_.level_compaction_dynamic_level_bytes = false;
|
||||
NewVersionStorage(6, kCompactionStyleLevel);
|
||||
// A compaction should be triggered and pick file 2 and 5.
|
||||
|
@ -2384,6 +2386,7 @@ TEST_F(CompactionPickerTest, MaxCompactionBytesHit) {
|
|||
TEST_F(CompactionPickerTest, MaxCompactionBytesNotHit) {
|
||||
mutable_cf_options_.max_bytes_for_level_base = 800000u;
|
||||
mutable_cf_options_.max_compaction_bytes = 1000000u;
|
||||
mutable_cf_options_.ignore_max_compaction_bytes_for_input = false;
|
||||
ioptions_.level_compaction_dynamic_level_bytes = false;
|
||||
NewVersionStorage(6, kCompactionStyleLevel);
|
||||
// A compaction should be triggered and pick file 2 and 5.
|
||||
|
|
|
@ -684,6 +684,17 @@ struct AdvancedColumnFamilyOptions {
|
|||
// Dynamically changeable through SetOptions() API
|
||||
uint64_t max_compaction_bytes = 0;
|
||||
|
||||
// When setting up compaction input files, we ignore the
|
||||
// `max_compaction_bytes` limit when pulling in input files that are entirely
|
||||
// within output key range.
|
||||
//
|
||||
// Default: true
|
||||
//
|
||||
// Dynamically changeable through SetOptions() API
|
||||
// We could remove this knob and always ignore the limit once it is proven
|
||||
// safe.
|
||||
bool ignore_max_compaction_bytes_for_input = true;
|
||||
|
||||
// All writes will be slowed down to at least delayed_write_rate if estimated
|
||||
// bytes needed to be compaction exceed this threshold.
|
||||
//
|
||||
|
|
|
@ -270,6 +270,11 @@ static std::unordered_map<std::string, OptionTypeInfo>
|
|||
{offsetof(struct MutableCFOptions, max_compaction_bytes),
|
||||
OptionType::kUInt64T, OptionVerificationType::kNormal,
|
||||
OptionTypeFlags::kMutable}},
|
||||
{"ignore_max_compaction_bytes_for_input",
|
||||
{offsetof(struct MutableCFOptions,
|
||||
ignore_max_compaction_bytes_for_input),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal,
|
||||
OptionTypeFlags::kMutable}},
|
||||
{"expanded_compaction_factor",
|
||||
{0, OptionType::kInt, OptionVerificationType::kDeprecated,
|
||||
OptionTypeFlags::kMutable}},
|
||||
|
@ -1034,6 +1039,8 @@ void MutableCFOptions::Dump(Logger* log) const {
|
|||
level0_stop_writes_trigger);
|
||||
ROCKS_LOG_INFO(log, " max_compaction_bytes: %" PRIu64,
|
||||
max_compaction_bytes);
|
||||
ROCKS_LOG_INFO(log, " ignore_max_compaction_bytes_for_input: %s",
|
||||
ignore_max_compaction_bytes_for_input ? "true" : "false");
|
||||
ROCKS_LOG_INFO(log, " target_file_size_base: %" PRIu64,
|
||||
target_file_size_base);
|
||||
ROCKS_LOG_INFO(log, " target_file_size_multiplier: %d",
|
||||
|
|
|
@ -130,6 +130,8 @@ struct MutableCFOptions {
|
|||
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
|
||||
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
|
||||
max_compaction_bytes(options.max_compaction_bytes),
|
||||
ignore_max_compaction_bytes_for_input(
|
||||
options.ignore_max_compaction_bytes_for_input),
|
||||
target_file_size_base(options.target_file_size_base),
|
||||
target_file_size_multiplier(options.target_file_size_multiplier),
|
||||
max_bytes_for_level_base(options.max_bytes_for_level_base),
|
||||
|
@ -192,6 +194,7 @@ struct MutableCFOptions {
|
|||
level0_slowdown_writes_trigger(0),
|
||||
level0_stop_writes_trigger(0),
|
||||
max_compaction_bytes(0),
|
||||
ignore_max_compaction_bytes_for_input(true),
|
||||
target_file_size_base(0),
|
||||
target_file_size_multiplier(0),
|
||||
max_bytes_for_level_base(0),
|
||||
|
@ -273,6 +276,7 @@ struct MutableCFOptions {
|
|||
int level0_slowdown_writes_trigger;
|
||||
int level0_stop_writes_trigger;
|
||||
uint64_t max_compaction_bytes;
|
||||
bool ignore_max_compaction_bytes_for_input;
|
||||
uint64_t target_file_size_base;
|
||||
int target_file_size_multiplier;
|
||||
uint64_t max_bytes_for_level_base;
|
||||
|
|
|
@ -71,6 +71,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
|
|||
max_bytes_for_level_multiplier_additional(
|
||||
options.max_bytes_for_level_multiplier_additional),
|
||||
max_compaction_bytes(options.max_compaction_bytes),
|
||||
ignore_max_compaction_bytes_for_input(
|
||||
options.ignore_max_compaction_bytes_for_input),
|
||||
soft_pending_compaction_bytes_limit(
|
||||
options.soft_pending_compaction_bytes_limit),
|
||||
hard_pending_compaction_bytes_limit(
|
||||
|
@ -281,6 +283,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
|
|||
ROCKS_LOG_HEADER(
|
||||
log, " Options.max_compaction_bytes: %" PRIu64,
|
||||
max_compaction_bytes);
|
||||
ROCKS_LOG_HEADER(log, " Options.ignore_max_compaction_bytes_for_input: %s",
|
||||
ignore_max_compaction_bytes_for_input ? "true" : "false");
|
||||
ROCKS_LOG_HEADER(
|
||||
log,
|
||||
" Options.arena_block_size: %" ROCKSDB_PRIszt,
|
||||
|
|
|
@ -229,6 +229,8 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
|
|||
moptions.level0_slowdown_writes_trigger;
|
||||
cf_opts->level0_stop_writes_trigger = moptions.level0_stop_writes_trigger;
|
||||
cf_opts->max_compaction_bytes = moptions.max_compaction_bytes;
|
||||
cf_opts->ignore_max_compaction_bytes_for_input =
|
||||
moptions.ignore_max_compaction_bytes_for_input;
|
||||
cf_opts->target_file_size_base = moptions.target_file_size_base;
|
||||
cf_opts->target_file_size_multiplier = moptions.target_file_size_multiplier;
|
||||
cf_opts->max_bytes_for_level_base = moptions.max_bytes_for_level_base;
|
||||
|
|
|
@ -486,6 +486,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
|
|||
"max_write_buffer_number=84;"
|
||||
"write_buffer_size=1653;"
|
||||
"max_compaction_bytes=64;"
|
||||
"ignore_max_compaction_bytes_for_input=true;"
|
||||
"max_bytes_for_level_multiplier=60;"
|
||||
"memtable_factory=SkipListFactory;"
|
||||
"compression=kNoCompression;"
|
||||
|
|
Loading…
Reference in New Issue