Use compensated file size for intra-L0 compaction (#12878)

Summary:
In leveled compaction, we pick intra-L0 compaction instead of L0->Lbase whenever L0 size is small. When L0 files contain many deletions, it makes more sense to compact then down instead of accumulating tombstones in L0. This PR uses compensated_file_size when computing L0 size for determining intra-L0 compaction. Also scale down the limit on total L0 size further to be more cautious about accumulating data in L0.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12878

Test Plan: updated unit test.

Reviewed By: hx235

Differential Revision: D59932421

Pulled By: cbi42

fbshipit-source-id: 9de973ac51eb7df81b38b8c68110072b1aa06321
This commit is contained in:
Changyu Bi 2024-08-01 17:49:34 -07:00 committed by Facebook GitHub Bot
parent 005256bcc8
commit 8be824e316
3 changed files with 16 additions and 10 deletions

View File

@ -925,11 +925,15 @@ bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
}
uint64_t l0_size = 0;
for (const auto& file : l0_files) {
l0_size += file->fd.GetFileSize();
assert(file->compensated_file_size >= file->fd.GetFileSize());
// Compact down L0s with more deletions.
l0_size += file->compensated_file_size;
}
const uint64_t min_lbase_size =
l0_size * static_cast<uint64_t>(std::max(
10.0, mutable_cf_options_.max_bytes_for_level_multiplier));
// Avoid L0->Lbase compactions that are inefficient for write-amp.
const double kMultiplier =
std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2;
const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier);
assert(min_lbase_size >= l0_size);
const std::vector<FileMetaData*>& lbase_files =
vstorage_->LevelFiles(/*level=*/base_level);

View File

@ -4284,27 +4284,28 @@ TEST_F(CompactionPickerTest, IntraL0WhenL0IsSmall) {
SCOPED_TRACE("lbase_size_multiplier=" +
std::to_string(lbase_size_multiplier));
NewVersionStorage(6, kCompactionStyleLevel);
// When L0 size is <= Lbase size / max_bytes_for_level_multiplier,
// When L0 size is <= Lbase size / max_bytes_for_level_multiplier / 2,
// intra-L0 compaction is picked. Otherwise, L0->L1
// compaction is picked.
// compensated_file_size will be used to compute total l0 size.
Add(/*level=*/0, /*file_number=*/1U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
/*smallest_seq=*/10, /*largest_seq=*/11,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/2U, /*smallest=*/"100",
/*largest=*/"100", /*file_size=*/1000, /*path_id=*/0,
/*largest=*/"100", /*file_size=*/10, /*path_id=*/0,
/*smallest_seq=*/20, /*largest_seq=*/21,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/3U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
/*smallest_seq=*/30, /*largest_seq=*/31,
/*compensated_file_size=*/1000);
Add(/*level=*/0, /*file_number=*/4U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/1000, /*path_id=*/0,
/*largest=*/"200", /*file_size=*/10, /*path_id=*/0,
/*smallest_seq=*/40, /*largest_seq=*/41,
/*compensated_file_size=*/1000);
const uint64_t l0_size = 4000;
const uint64_t lbase_size = l0_size * lbase_size_multiplier;
const uint64_t lbase_size = l0_size * lbase_size_multiplier * 2;
Add(/*level=*/1, /*file_number=*/5U, /*smallest=*/"100",
/*largest=*/"200", /*file_size=*/lbase_size, /*path_id=*/0,
/*smallest_seq=*/0, /*largest_seq=*/0,

View File

@ -0,0 +1 @@
* There may be less intra-L0 compaction triggered by total L0 size being too small. We now use compensated file size (tombstones are assigned some value size) when calculating L0 size and reduce the threshold for L0 size limit. This is to avoid accumulating too much data/tombstones in L0.