From ba16e8eee7b1a7ad933ff67e33082c46b01d68d7 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 14 Apr 2023 11:50:20 -0700 Subject: [PATCH] Try to pick more files in `LevelCompactionBuilder::TryExtendNonL0TrivialMove()` (#11347) Summary: Before this PR, in `LevelCompactionBuilder::TryExtendNonL0TrivialMove(index)`, we start from a file at index and expand the compaction input towards right to find files to trivial move. This PR adds the logic to also expand towards left. Another major change made in this PR is to not expand L0 files through `TryExtendNonL0TrivialMove()`. This happens currently when compacting L0 files to an empty output level. The condition for expanding files in `TryExtendNonL0TrivialMove()` is to check atomic boundary, which does not take into account that L0 files can overlap in key range and are not sorted in key order. So it may include more L0 files than needed and disallow a trivial move. This change is included in this PR so that we don't make it worse by always expanding L0 in both direction. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11347 Test Plan: * new unit test * Benchmark does not show obvious improvement or regression: ``` Write sequentially ./db_bench --benchmarks=fillseq --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216 Main: fillseq : 4.726 micros/op 211592 ops/sec 472.607 seconds 100000000 operations; 23.4 MB/s This PR: fillseq : 4.755 micros/op 210289 ops/sec 475.534 seconds 100000000 operations; 23.3 MB/s Write randomly ./db_bench --benchmarks=fillrandom --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216 Main: fillrandom : 16.351 micros/op 61159 ops/sec 1635.066 seconds 100000000 operations; 6.8 MB/s This PR: fillrandom : 15.798 micros/op 63298 ops/sec 1579.817 seconds 100000000 operations; 7.0 MB/s ``` Reviewed By: ajkr Differential Revision: D44645650 Pulled By: cbi42 fbshipit-source-id: 8631f3a6b3f01decbbf18c34f2b62833cb4f9733 --- db/compaction/compaction_picker_level.cc | 63 +++++++++++++++++++++--- db/compaction/compaction_picker_test.cc | 55 +++++++++++++++++++++ db/db_compaction_test.cc | 3 +- test_util/testutil.cc | 2 +- 4 files changed, 113 insertions(+), 10 deletions(-) diff --git a/db/compaction/compaction_picker_level.cc b/db/compaction/compaction_picker_level.cc index 2162d30a30..c436689bb6 100644 --- a/db/compaction/compaction_picker_level.cc +++ b/db/compaction/compaction_picker_level.cc @@ -83,7 +83,7 @@ class LevelCompactionBuilder { Compaction* GetCompaction(); - // For the specfied level, pick a file that we want to compact. + // From `start_level_`, pick files to compact to `output_level_`. // Returns false if there is no file to compact. // If it returns true, inputs->files.size() will be exactly one for // all compaction priorities except round-robin. For round-robin, @@ -107,8 +107,9 @@ class LevelCompactionBuilder { bool PickIntraL0Compaction(); // Return true if TrivialMove is extended. `start_index` is the index of - // the intiial file picked, which should already be in `start_level_inputs_`. - bool TryExtendNonL0TrivialMove(int start_index); + // the initial file picked, which should already be in `start_level_inputs_`. + bool TryExtendNonL0TrivialMove(int start_index, + bool only_expand_right = false); // Picks a file from level_files to compact. // level_files is a vector of (level, file metadata) in ascending order of @@ -355,7 +356,8 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() { vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, &output_level_inputs.files); if (output_level_inputs.empty()) { - if (TryExtendNonL0TrivialMove((int)start_index)) { + if (TryExtendNonL0TrivialMove((int)start_index, + true /* only_expand_right */)) { return; } } @@ -501,6 +503,16 @@ Compaction* LevelCompactionBuilder::PickCompaction() { } Compaction* LevelCompactionBuilder::GetCompaction() { + // TryPickL0TrivialMove() does not apply to the case when compacting L0 to an + // empty output level. So L0 files is picked in PickFileToCompact() by + // compaction score. We may still be able to do trivial move when this file + // does not overlap with other L0s. This happens when + // compaction_inputs_[0].size() == 1 since SetupOtherL0FilesIfNeeded() did not + // pull in more L0s. + assert(!compaction_inputs_.empty()); + bool l0_files_might_overlap = + start_level_ == 0 && !is_l0_trivial_move_ && + (compaction_inputs_.size() > 1 || compaction_inputs_[0].size() > 1); auto c = new Compaction( vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, std::move(compaction_inputs_), output_level_, @@ -515,8 +527,7 @@ Compaction* LevelCompactionBuilder::GetCompaction() { Temperature::kUnknown, /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, /* trim_ts */ "", start_level_score_, false /* deletion_compaction */, - /* l0_files_might_overlap */ start_level_ == 0 && !is_l0_trivial_move_, - compaction_reason_); + l0_files_might_overlap, compaction_reason_); // If it's level 0 compaction, make sure we don't execute any other level 0 // compactions in parallel @@ -653,7 +664,8 @@ bool LevelCompactionBuilder::TryPickL0TrivialMove() { return false; } -bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) { +bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index, + bool only_expand_right) { if (start_level_inputs_.size() == 1 && (ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) && (mutable_cf_options_.compression_per_level.empty())) { @@ -670,6 +682,7 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) { size_t total_size = initial_file->fd.GetFileSize(); CompactionInputFiles output_level_inputs; output_level_inputs.level = output_level_; + // Expand towards right for (int i = start_index + 1; i < static_cast(level_files.size()) && start_level_inputs_.size() < kMaxMultiTrivialMove; @@ -702,6 +715,37 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) { } start_level_inputs_.files.push_back(next_file); } + // Expand towards left + if (!only_expand_right) { + for (int i = start_index - 1; + i >= 0 && start_level_inputs_.size() < kMaxMultiTrivialMove; i--) { + FileMetaData* next_file = level_files[i]; + if (next_file->being_compacted) { + break; + } + vstorage_->GetOverlappingInputs(output_level_, &(next_file->smallest), + &(initial_file->largest), + &output_level_inputs.files); + if (!output_level_inputs.empty()) { + break; + } + if (i > 0 && compaction_picker_->icmp() + ->user_comparator() + ->CompareWithoutTimestamp( + next_file->smallest.user_key(), + level_files[i - 1]->largest.user_key()) == 0) { + // Not a clean up after adding the next file. Skip. + break; + } + total_size += next_file->fd.GetFileSize(); + if (total_size > mutable_cf_options_.max_compaction_bytes) { + break; + } + // keep `files` sorted in increasing order by key range + start_level_inputs_.files.insert(start_level_inputs_.files.begin(), + next_file); + } + } return start_level_inputs_.size() > 1; } return false; @@ -785,7 +829,10 @@ bool LevelCompactionBuilder::PickFileToCompact() { vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, &output_level_inputs.files); if (output_level_inputs.empty()) { - if (TryExtendNonL0TrivialMove(index)) { + if (start_level_ > 0 && + TryExtendNonL0TrivialMove(index, + ioptions_.compaction_pri == + kRoundRobin /* only_expand_right */)) { break; } } else { diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 77fde161ac..c9cbb09ed3 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -2520,6 +2520,61 @@ TEST_F(CompactionPickerTest, L0TrivialMoveWholeL0) { ASSERT_TRUE(compaction->IsTrivialMove()); } +TEST_F(CompactionPickerTest, NonL0TrivialMoveExtendBothDirection) { + mutable_cf_options_.max_bytes_for_level_base = 5000; + mutable_cf_options_.level0_file_num_compaction_trigger = 4; + mutable_cf_options_.max_compaction_bytes = 10000000u; + ioptions_.level_compaction_dynamic_level_bytes = false; + NewVersionStorage(6, kCompactionStyleLevel); + + Add(1, 1U, "300", "350", 3000U, 0, 710, 800, 3000U); + Add(1, 2U, "600", "651", 3001U, 0, 610, 700, 3001U); + Add(1, 3U, "700", "750", 3000U, 0, 500, 550, 3000U); + Add(2, 4U, "800", "850", 4000U, 0, 150, 200, 4000U); + + UpdateVersionStorageInfo(); + // File #2 should be picked first, and expand both directions to include + // files #1 and #3. + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1, compaction->num_input_levels()); + ASSERT_EQ(3, compaction->num_input_files(0)); + ASSERT_EQ(1, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2, compaction->input(0, 1)->fd.GetNumber()); + ASSERT_EQ(3, compaction->input(0, 2)->fd.GetNumber()); + ASSERT_TRUE(compaction->IsTrivialMove()); +} + +TEST_F(CompactionPickerTest, L0TrivialMoveToEmptyLevel) { + mutable_cf_options_.max_bytes_for_level_base = 5000; + mutable_cf_options_.level0_file_num_compaction_trigger = 4; + mutable_cf_options_.max_compaction_bytes = 10000000u; + ioptions_.level_compaction_dynamic_level_bytes = false; + NewVersionStorage(6, kCompactionStyleLevel); + + // File 2 will be picked first, which by itself is trivial movable. + // There was a bug before where compaction also picks file 3 and 4, + // (and then file 1 since it overlaps with the key range), + // which makes the compaction not trivial movable. + Add(0, 1U, "450", "599", 3000U, 0, 710, 800, 3000U); + Add(0, 2U, "600", "651", 3001U, 0, 610, 700, 3001U); + Add(0, 3U, "300", "350", 3000U, 0, 500, 550, 3000U); + Add(0, 4U, "500", "550", 2999U, 0, 300, 350, 2999U); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1, compaction->num_input_levels()); + ASSERT_EQ(1, compaction->num_input_files(0)); + ASSERT_EQ(2, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_TRUE(compaction->IsTrivialMove()); +} + TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) { mutable_cf_options_.max_bytes_for_level_base = 10000u; mutable_cf_options_.max_compaction_bytes = 10001u; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 119935c660..b214b201b4 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -5509,8 +5509,9 @@ TEST_F(DBCompactionTest, CompactionLimiter) { for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) { for (unsigned int cf = 0; cf < cf_count; cf++) { + // All L0s should overlap with each other for (int i = 0; i < kNumKeysPerFile; i++) { - ASSERT_OK(Put(cf, Key(keyIndex++), "")); + ASSERT_OK(Put(cf, Key(i), "")); } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 031104a7b5..b3dfc0830e 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -629,7 +629,7 @@ class SpecialSkipListFactory : public MemTableRepFactory { }); return true; } - // After number of inserts exceeds `num_entries_flush` in a mem table, trigger + // After number of inserts >= `num_entries_flush` in a mem table, trigger // flush. explicit SpecialSkipListFactory(int num_entries_flush) : num_entries_flush_(num_entries_flush) {}