Try to pick more files in LevelCompactionBuilder::TryExtendNonL0TrivialMove() (#11347)

Summary:
Before this PR, in `LevelCompactionBuilder::TryExtendNonL0TrivialMove(index)`, we start from a file at index and expand the compaction input towards right to find files to trivial move. This PR adds the logic to also expand towards left.

Another major change made in this PR is to not expand L0 files through `TryExtendNonL0TrivialMove()`. This happens currently when compacting L0 files to an empty output level. The condition for expanding files in `TryExtendNonL0TrivialMove()` is to check atomic boundary, which does not take into account that L0 files can overlap in key range and are not sorted in key order. So it may include more L0 files than needed and disallow a trivial move. This change is included in this PR so that we don't make it worse by always expanding L0 in both direction.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11347

Test Plan:
* new unit test
* Benchmark does not show obvious improvement or regression:
```
Write sequentially
./db_bench --benchmarks=fillseq --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216

Main:
fillseq      :       4.726 micros/op 211592 ops/sec 472.607 seconds 100000000 operations;   23.4 MB/s
This PR:
fillseq      :       4.755 micros/op 210289 ops/sec 475.534 seconds 100000000 operations;   23.3 MB/s

Write randomly
./db_bench --benchmarks=fillrandom --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216

Main:
fillrandom   :      16.351 micros/op 61159 ops/sec 1635.066 seconds 100000000 operations;    6.8 MB/s
This PR:
fillrandom   :      15.798 micros/op 63298 ops/sec 1579.817 seconds 100000000 operations;    7.0 MB/s
```

Reviewed By: ajkr

Differential Revision: D44645650

Pulled By: cbi42

fbshipit-source-id: 8631f3a6b3f01decbbf18c34f2b62833cb4f9733
This commit is contained in:
Changyu Bi 2023-04-14 11:50:20 -07:00 committed by Facebook GitHub Bot
parent 9500d90d1b
commit ba16e8eee7
4 changed files with 113 additions and 10 deletions

View file

@ -83,7 +83,7 @@ class LevelCompactionBuilder {
Compaction* GetCompaction(); Compaction* GetCompaction();
// For the specfied level, pick a file that we want to compact. // From `start_level_`, pick files to compact to `output_level_`.
// Returns false if there is no file to compact. // Returns false if there is no file to compact.
// If it returns true, inputs->files.size() will be exactly one for // If it returns true, inputs->files.size() will be exactly one for
// all compaction priorities except round-robin. For round-robin, // all compaction priorities except round-robin. For round-robin,
@ -107,8 +107,9 @@ class LevelCompactionBuilder {
bool PickIntraL0Compaction(); bool PickIntraL0Compaction();
// Return true if TrivialMove is extended. `start_index` is the index of // Return true if TrivialMove is extended. `start_index` is the index of
// the intiial file picked, which should already be in `start_level_inputs_`. // the initial file picked, which should already be in `start_level_inputs_`.
bool TryExtendNonL0TrivialMove(int start_index); bool TryExtendNonL0TrivialMove(int start_index,
bool only_expand_right = false);
// Picks a file from level_files to compact. // Picks a file from level_files to compact.
// level_files is a vector of (level, file metadata) in ascending order of // level_files is a vector of (level, file metadata) in ascending order of
@ -355,7 +356,8 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
&output_level_inputs.files); &output_level_inputs.files);
if (output_level_inputs.empty()) { if (output_level_inputs.empty()) {
if (TryExtendNonL0TrivialMove((int)start_index)) { if (TryExtendNonL0TrivialMove((int)start_index,
true /* only_expand_right */)) {
return; return;
} }
} }
@ -501,6 +503,16 @@ Compaction* LevelCompactionBuilder::PickCompaction() {
} }
Compaction* LevelCompactionBuilder::GetCompaction() { Compaction* LevelCompactionBuilder::GetCompaction() {
// TryPickL0TrivialMove() does not apply to the case when compacting L0 to an
// empty output level. So L0 files is picked in PickFileToCompact() by
// compaction score. We may still be able to do trivial move when this file
// does not overlap with other L0s. This happens when
// compaction_inputs_[0].size() == 1 since SetupOtherL0FilesIfNeeded() did not
// pull in more L0s.
assert(!compaction_inputs_.empty());
bool l0_files_might_overlap =
start_level_ == 0 && !is_l0_trivial_move_ &&
(compaction_inputs_.size() > 1 || compaction_inputs_[0].size() > 1);
auto c = new Compaction( auto c = new Compaction(
vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
std::move(compaction_inputs_), output_level_, std::move(compaction_inputs_), output_level_,
@ -515,8 +527,7 @@ Compaction* LevelCompactionBuilder::GetCompaction() {
Temperature::kUnknown, Temperature::kUnknown,
/* max_subcompactions */ 0, std::move(grandparents_), is_manual_, /* max_subcompactions */ 0, std::move(grandparents_), is_manual_,
/* trim_ts */ "", start_level_score_, false /* deletion_compaction */, /* trim_ts */ "", start_level_score_, false /* deletion_compaction */,
/* l0_files_might_overlap */ start_level_ == 0 && !is_l0_trivial_move_, l0_files_might_overlap, compaction_reason_);
compaction_reason_);
// If it's level 0 compaction, make sure we don't execute any other level 0 // If it's level 0 compaction, make sure we don't execute any other level 0
// compactions in parallel // compactions in parallel
@ -653,7 +664,8 @@ bool LevelCompactionBuilder::TryPickL0TrivialMove() {
return false; return false;
} }
bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) { bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index,
bool only_expand_right) {
if (start_level_inputs_.size() == 1 && if (start_level_inputs_.size() == 1 &&
(ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) && (ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) &&
(mutable_cf_options_.compression_per_level.empty())) { (mutable_cf_options_.compression_per_level.empty())) {
@ -670,6 +682,7 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) {
size_t total_size = initial_file->fd.GetFileSize(); size_t total_size = initial_file->fd.GetFileSize();
CompactionInputFiles output_level_inputs; CompactionInputFiles output_level_inputs;
output_level_inputs.level = output_level_; output_level_inputs.level = output_level_;
// Expand towards right
for (int i = start_index + 1; for (int i = start_index + 1;
i < static_cast<int>(level_files.size()) && i < static_cast<int>(level_files.size()) &&
start_level_inputs_.size() < kMaxMultiTrivialMove; start_level_inputs_.size() < kMaxMultiTrivialMove;
@ -702,6 +715,37 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) {
} }
start_level_inputs_.files.push_back(next_file); start_level_inputs_.files.push_back(next_file);
} }
// Expand towards left
if (!only_expand_right) {
for (int i = start_index - 1;
i >= 0 && start_level_inputs_.size() < kMaxMultiTrivialMove; i--) {
FileMetaData* next_file = level_files[i];
if (next_file->being_compacted) {
break;
}
vstorage_->GetOverlappingInputs(output_level_, &(next_file->smallest),
&(initial_file->largest),
&output_level_inputs.files);
if (!output_level_inputs.empty()) {
break;
}
if (i > 0 && compaction_picker_->icmp()
->user_comparator()
->CompareWithoutTimestamp(
next_file->smallest.user_key(),
level_files[i - 1]->largest.user_key()) == 0) {
// Not a clean up after adding the next file. Skip.
break;
}
total_size += next_file->fd.GetFileSize();
if (total_size > mutable_cf_options_.max_compaction_bytes) {
break;
}
// keep `files` sorted in increasing order by key range
start_level_inputs_.files.insert(start_level_inputs_.files.begin(),
next_file);
}
}
return start_level_inputs_.size() > 1; return start_level_inputs_.size() > 1;
} }
return false; return false;
@ -785,7 +829,10 @@ bool LevelCompactionBuilder::PickFileToCompact() {
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
&output_level_inputs.files); &output_level_inputs.files);
if (output_level_inputs.empty()) { if (output_level_inputs.empty()) {
if (TryExtendNonL0TrivialMove(index)) { if (start_level_ > 0 &&
TryExtendNonL0TrivialMove(index,
ioptions_.compaction_pri ==
kRoundRobin /* only_expand_right */)) {
break; break;
} }
} else { } else {

View file

@ -2520,6 +2520,61 @@ TEST_F(CompactionPickerTest, L0TrivialMoveWholeL0) {
ASSERT_TRUE(compaction->IsTrivialMove()); ASSERT_TRUE(compaction->IsTrivialMove());
} }
TEST_F(CompactionPickerTest, NonL0TrivialMoveExtendBothDirection) {
mutable_cf_options_.max_bytes_for_level_base = 5000;
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
mutable_cf_options_.max_compaction_bytes = 10000000u;
ioptions_.level_compaction_dynamic_level_bytes = false;
NewVersionStorage(6, kCompactionStyleLevel);
Add(1, 1U, "300", "350", 3000U, 0, 710, 800, 3000U);
Add(1, 2U, "600", "651", 3001U, 0, 610, 700, 3001U);
Add(1, 3U, "700", "750", 3000U, 0, 500, 550, 3000U);
Add(2, 4U, "800", "850", 4000U, 0, 150, 200, 4000U);
UpdateVersionStorageInfo();
// File #2 should be picked first, and expand both directions to include
// files #1 and #3.
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
&log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1, compaction->num_input_levels());
ASSERT_EQ(3, compaction->num_input_files(0));
ASSERT_EQ(1, compaction->input(0, 0)->fd.GetNumber());
ASSERT_EQ(2, compaction->input(0, 1)->fd.GetNumber());
ASSERT_EQ(3, compaction->input(0, 2)->fd.GetNumber());
ASSERT_TRUE(compaction->IsTrivialMove());
}
TEST_F(CompactionPickerTest, L0TrivialMoveToEmptyLevel) {
mutable_cf_options_.max_bytes_for_level_base = 5000;
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
mutable_cf_options_.max_compaction_bytes = 10000000u;
ioptions_.level_compaction_dynamic_level_bytes = false;
NewVersionStorage(6, kCompactionStyleLevel);
// File 2 will be picked first, which by itself is trivial movable.
// There was a bug before where compaction also picks file 3 and 4,
// (and then file 1 since it overlaps with the key range),
// which makes the compaction not trivial movable.
Add(0, 1U, "450", "599", 3000U, 0, 710, 800, 3000U);
Add(0, 2U, "600", "651", 3001U, 0, 610, 700, 3001U);
Add(0, 3U, "300", "350", 3000U, 0, 500, 550, 3000U);
Add(0, 4U, "500", "550", 2999U, 0, 300, 350, 2999U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
&log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1, compaction->num_input_levels());
ASSERT_EQ(1, compaction->num_input_files(0));
ASSERT_EQ(2, compaction->input(0, 0)->fd.GetNumber());
ASSERT_TRUE(compaction->IsTrivialMove());
}
TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) { TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) {
mutable_cf_options_.max_bytes_for_level_base = 10000u; mutable_cf_options_.max_bytes_for_level_base = 10000u;
mutable_cf_options_.max_compaction_bytes = 10001u; mutable_cf_options_.max_compaction_bytes = 10001u;

View file

@ -5509,8 +5509,9 @@ TEST_F(DBCompactionTest, CompactionLimiter) {
for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) { for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) {
for (unsigned int cf = 0; cf < cf_count; cf++) { for (unsigned int cf = 0; cf < cf_count; cf++) {
// All L0s should overlap with each other
for (int i = 0; i < kNumKeysPerFile; i++) { for (int i = 0; i < kNumKeysPerFile; i++) {
ASSERT_OK(Put(cf, Key(keyIndex++), "")); ASSERT_OK(Put(cf, Key(i), ""));
} }
// put extra key to trigger flush // put extra key to trigger flush
ASSERT_OK(Put(cf, "", "")); ASSERT_OK(Put(cf, "", ""));

View file

@ -629,7 +629,7 @@ class SpecialSkipListFactory : public MemTableRepFactory {
}); });
return true; return true;
} }
// After number of inserts exceeds `num_entries_flush` in a mem table, trigger // After number of inserts >= `num_entries_flush` in a mem table, trigger
// flush. // flush.
explicit SpecialSkipListFactory(int num_entries_flush) explicit SpecialSkipListFactory(int num_entries_flush)
: num_entries_flush_(num_entries_flush) {} : num_entries_flush_(num_entries_flush) {}