From 8be824e316436418e4a5f07a0fb0a50046ab51b4 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Thu, 1 Aug 2024 17:49:34 -0700 Subject: [PATCH] Use compensated file size for intra-L0 compaction (#12878) Summary: In leveled compaction, we pick intra-L0 compaction instead of L0->Lbase whenever L0 size is small. When L0 files contain many deletions, it makes more sense to compact then down instead of accumulating tombstones in L0. This PR uses compensated_file_size when computing L0 size for determining intra-L0 compaction. Also scale down the limit on total L0 size further to be more cautious about accumulating data in L0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12878 Test Plan: updated unit test. Reviewed By: hx235 Differential Revision: D59932421 Pulled By: cbi42 fbshipit-source-id: 9de973ac51eb7df81b38b8c68110072b1aa06321 --- db/compaction/compaction_picker_level.cc | 12 ++++++++---- db/compaction/compaction_picker_test.cc | 13 +++++++------ unreleased_history/behavior_changes/intra-l0.md | 1 + 3 files changed, 16 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/behavior_changes/intra-l0.md diff --git a/db/compaction/compaction_picker_level.cc b/db/compaction/compaction_picker_level.cc index 92cf865016..ae289ac3fb 100644 --- a/db/compaction/compaction_picker_level.cc +++ b/db/compaction/compaction_picker_level.cc @@ -925,11 +925,15 @@ bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() { } uint64_t l0_size = 0; for (const auto& file : l0_files) { - l0_size += file->fd.GetFileSize(); + assert(file->compensated_file_size >= file->fd.GetFileSize()); + // Compact down L0s with more deletions. + l0_size += file->compensated_file_size; } - const uint64_t min_lbase_size = - l0_size * static_cast(std::max( - 10.0, mutable_cf_options_.max_bytes_for_level_multiplier)); + + // Avoid L0->Lbase compactions that are inefficient for write-amp. + const double kMultiplier = + std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2; + const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier); assert(min_lbase_size >= l0_size); const std::vector& lbase_files = vstorage_->LevelFiles(/*level=*/base_level); diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 70f59a8765..1341848688 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -4284,27 +4284,28 @@ TEST_F(CompactionPickerTest, IntraL0WhenL0IsSmall) { SCOPED_TRACE("lbase_size_multiplier=" + std::to_string(lbase_size_multiplier)); NewVersionStorage(6, kCompactionStyleLevel); - // When L0 size is <= Lbase size / max_bytes_for_level_multiplier, + // When L0 size is <= Lbase size / max_bytes_for_level_multiplier / 2, // intra-L0 compaction is picked. Otherwise, L0->L1 // compaction is picked. + // compensated_file_size will be used to compute total l0 size. Add(/*level=*/0, /*file_number=*/1U, /*smallest=*/"100", - /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0, + /*largest=*/"200", /*file_size=*/10, /*path_id=*/0, /*smallest_seq=*/10, /*largest_seq=*/11, /*compensated_file_size=*/1000); Add(/*level=*/0, /*file_number=*/2U, /*smallest=*/"100", - /*largest=*/"100", /*file_size=*/1000, /*path_id=*/0, + /*largest=*/"100", /*file_size=*/10, /*path_id=*/0, /*smallest_seq=*/20, /*largest_seq=*/21, /*compensated_file_size=*/1000); Add(/*level=*/0, /*file_number=*/3U, /*smallest=*/"100", - /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0, + /*largest=*/"200", /*file_size=*/10, /*path_id=*/0, /*smallest_seq=*/30, /*largest_seq=*/31, /*compensated_file_size=*/1000); Add(/*level=*/0, /*file_number=*/4U, /*smallest=*/"100", - /*largest=*/"200", /*file_size=*/1000, /*path_id=*/0, + /*largest=*/"200", /*file_size=*/10, /*path_id=*/0, /*smallest_seq=*/40, /*largest_seq=*/41, /*compensated_file_size=*/1000); const uint64_t l0_size = 4000; - const uint64_t lbase_size = l0_size * lbase_size_multiplier; + const uint64_t lbase_size = l0_size * lbase_size_multiplier * 2; Add(/*level=*/1, /*file_number=*/5U, /*smallest=*/"100", /*largest=*/"200", /*file_size=*/lbase_size, /*path_id=*/0, /*smallest_seq=*/0, /*largest_seq=*/0, diff --git a/unreleased_history/behavior_changes/intra-l0.md b/unreleased_history/behavior_changes/intra-l0.md new file mode 100644 index 0000000000..81d55bbc07 --- /dev/null +++ b/unreleased_history/behavior_changes/intra-l0.md @@ -0,0 +1 @@ +* There may be less intra-L0 compaction triggered by total L0 size being too small. We now use compensated file size (tombstones are assigned some value size) when calculating L0 size and reduce the threshold for L0 size limit. This is to avoid accumulating too much data/tombstones in L0. \ No newline at end of file