mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-28 05:43:50 +00:00
Try to pick more files in LevelCompactionBuilder::TryExtendNonL0TrivialMove()
(#11347)
Summary: Before this PR, in `LevelCompactionBuilder::TryExtendNonL0TrivialMove(index)`, we start from a file at index and expand the compaction input towards right to find files to trivial move. This PR adds the logic to also expand towards left. Another major change made in this PR is to not expand L0 files through `TryExtendNonL0TrivialMove()`. This happens currently when compacting L0 files to an empty output level. The condition for expanding files in `TryExtendNonL0TrivialMove()` is to check atomic boundary, which does not take into account that L0 files can overlap in key range and are not sorted in key order. So it may include more L0 files than needed and disallow a trivial move. This change is included in this PR so that we don't make it worse by always expanding L0 in both direction. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11347 Test Plan: * new unit test * Benchmark does not show obvious improvement or regression: ``` Write sequentially ./db_bench --benchmarks=fillseq --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216 Main: fillseq : 4.726 micros/op 211592 ops/sec 472.607 seconds 100000000 operations; 23.4 MB/s This PR: fillseq : 4.755 micros/op 210289 ops/sec 475.534 seconds 100000000 operations; 23.3 MB/s Write randomly ./db_bench --benchmarks=fillrandom --compression_type=lz4 --write_buffer_size=1000000 --num=100000000 --value_size=100 -level_compaction_dynamic_level_bytes --target_file_size_base=7340032 --max_bytes_for_level_base=16777216 Main: fillrandom : 16.351 micros/op 61159 ops/sec 1635.066 seconds 100000000 operations; 6.8 MB/s This PR: fillrandom : 15.798 micros/op 63298 ops/sec 1579.817 seconds 100000000 operations; 7.0 MB/s ``` Reviewed By: ajkr Differential Revision: D44645650 Pulled By: cbi42 fbshipit-source-id: 8631f3a6b3f01decbbf18c34f2b62833cb4f9733
This commit is contained in:
parent
9500d90d1b
commit
ba16e8eee7
|
@ -83,7 +83,7 @@ class LevelCompactionBuilder {
|
||||||
|
|
||||||
Compaction* GetCompaction();
|
Compaction* GetCompaction();
|
||||||
|
|
||||||
// For the specfied level, pick a file that we want to compact.
|
// From `start_level_`, pick files to compact to `output_level_`.
|
||||||
// Returns false if there is no file to compact.
|
// Returns false if there is no file to compact.
|
||||||
// If it returns true, inputs->files.size() will be exactly one for
|
// If it returns true, inputs->files.size() will be exactly one for
|
||||||
// all compaction priorities except round-robin. For round-robin,
|
// all compaction priorities except round-robin. For round-robin,
|
||||||
|
@ -107,8 +107,9 @@ class LevelCompactionBuilder {
|
||||||
bool PickIntraL0Compaction();
|
bool PickIntraL0Compaction();
|
||||||
|
|
||||||
// Return true if TrivialMove is extended. `start_index` is the index of
|
// Return true if TrivialMove is extended. `start_index` is the index of
|
||||||
// the intiial file picked, which should already be in `start_level_inputs_`.
|
// the initial file picked, which should already be in `start_level_inputs_`.
|
||||||
bool TryExtendNonL0TrivialMove(int start_index);
|
bool TryExtendNonL0TrivialMove(int start_index,
|
||||||
|
bool only_expand_right = false);
|
||||||
|
|
||||||
// Picks a file from level_files to compact.
|
// Picks a file from level_files to compact.
|
||||||
// level_files is a vector of (level, file metadata) in ascending order of
|
// level_files is a vector of (level, file metadata) in ascending order of
|
||||||
|
@ -355,7 +356,8 @@ void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
|
||||||
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
|
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
|
||||||
&output_level_inputs.files);
|
&output_level_inputs.files);
|
||||||
if (output_level_inputs.empty()) {
|
if (output_level_inputs.empty()) {
|
||||||
if (TryExtendNonL0TrivialMove((int)start_index)) {
|
if (TryExtendNonL0TrivialMove((int)start_index,
|
||||||
|
true /* only_expand_right */)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -501,6 +503,16 @@ Compaction* LevelCompactionBuilder::PickCompaction() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Compaction* LevelCompactionBuilder::GetCompaction() {
|
Compaction* LevelCompactionBuilder::GetCompaction() {
|
||||||
|
// TryPickL0TrivialMove() does not apply to the case when compacting L0 to an
|
||||||
|
// empty output level. So L0 files is picked in PickFileToCompact() by
|
||||||
|
// compaction score. We may still be able to do trivial move when this file
|
||||||
|
// does not overlap with other L0s. This happens when
|
||||||
|
// compaction_inputs_[0].size() == 1 since SetupOtherL0FilesIfNeeded() did not
|
||||||
|
// pull in more L0s.
|
||||||
|
assert(!compaction_inputs_.empty());
|
||||||
|
bool l0_files_might_overlap =
|
||||||
|
start_level_ == 0 && !is_l0_trivial_move_ &&
|
||||||
|
(compaction_inputs_.size() > 1 || compaction_inputs_[0].size() > 1);
|
||||||
auto c = new Compaction(
|
auto c = new Compaction(
|
||||||
vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
|
vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
|
||||||
std::move(compaction_inputs_), output_level_,
|
std::move(compaction_inputs_), output_level_,
|
||||||
|
@ -515,8 +527,7 @@ Compaction* LevelCompactionBuilder::GetCompaction() {
|
||||||
Temperature::kUnknown,
|
Temperature::kUnknown,
|
||||||
/* max_subcompactions */ 0, std::move(grandparents_), is_manual_,
|
/* max_subcompactions */ 0, std::move(grandparents_), is_manual_,
|
||||||
/* trim_ts */ "", start_level_score_, false /* deletion_compaction */,
|
/* trim_ts */ "", start_level_score_, false /* deletion_compaction */,
|
||||||
/* l0_files_might_overlap */ start_level_ == 0 && !is_l0_trivial_move_,
|
l0_files_might_overlap, compaction_reason_);
|
||||||
compaction_reason_);
|
|
||||||
|
|
||||||
// If it's level 0 compaction, make sure we don't execute any other level 0
|
// If it's level 0 compaction, make sure we don't execute any other level 0
|
||||||
// compactions in parallel
|
// compactions in parallel
|
||||||
|
@ -653,7 +664,8 @@ bool LevelCompactionBuilder::TryPickL0TrivialMove() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) {
|
bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index,
|
||||||
|
bool only_expand_right) {
|
||||||
if (start_level_inputs_.size() == 1 &&
|
if (start_level_inputs_.size() == 1 &&
|
||||||
(ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) &&
|
(ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) &&
|
||||||
(mutable_cf_options_.compression_per_level.empty())) {
|
(mutable_cf_options_.compression_per_level.empty())) {
|
||||||
|
@ -670,6 +682,7 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) {
|
||||||
size_t total_size = initial_file->fd.GetFileSize();
|
size_t total_size = initial_file->fd.GetFileSize();
|
||||||
CompactionInputFiles output_level_inputs;
|
CompactionInputFiles output_level_inputs;
|
||||||
output_level_inputs.level = output_level_;
|
output_level_inputs.level = output_level_;
|
||||||
|
// Expand towards right
|
||||||
for (int i = start_index + 1;
|
for (int i = start_index + 1;
|
||||||
i < static_cast<int>(level_files.size()) &&
|
i < static_cast<int>(level_files.size()) &&
|
||||||
start_level_inputs_.size() < kMaxMultiTrivialMove;
|
start_level_inputs_.size() < kMaxMultiTrivialMove;
|
||||||
|
@ -702,6 +715,37 @@ bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) {
|
||||||
}
|
}
|
||||||
start_level_inputs_.files.push_back(next_file);
|
start_level_inputs_.files.push_back(next_file);
|
||||||
}
|
}
|
||||||
|
// Expand towards left
|
||||||
|
if (!only_expand_right) {
|
||||||
|
for (int i = start_index - 1;
|
||||||
|
i >= 0 && start_level_inputs_.size() < kMaxMultiTrivialMove; i--) {
|
||||||
|
FileMetaData* next_file = level_files[i];
|
||||||
|
if (next_file->being_compacted) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
vstorage_->GetOverlappingInputs(output_level_, &(next_file->smallest),
|
||||||
|
&(initial_file->largest),
|
||||||
|
&output_level_inputs.files);
|
||||||
|
if (!output_level_inputs.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i > 0 && compaction_picker_->icmp()
|
||||||
|
->user_comparator()
|
||||||
|
->CompareWithoutTimestamp(
|
||||||
|
next_file->smallest.user_key(),
|
||||||
|
level_files[i - 1]->largest.user_key()) == 0) {
|
||||||
|
// Not a clean up after adding the next file. Skip.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
total_size += next_file->fd.GetFileSize();
|
||||||
|
if (total_size > mutable_cf_options_.max_compaction_bytes) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// keep `files` sorted in increasing order by key range
|
||||||
|
start_level_inputs_.files.insert(start_level_inputs_.files.begin(),
|
||||||
|
next_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
return start_level_inputs_.size() > 1;
|
return start_level_inputs_.size() > 1;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -785,7 +829,10 @@ bool LevelCompactionBuilder::PickFileToCompact() {
|
||||||
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
|
vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
|
||||||
&output_level_inputs.files);
|
&output_level_inputs.files);
|
||||||
if (output_level_inputs.empty()) {
|
if (output_level_inputs.empty()) {
|
||||||
if (TryExtendNonL0TrivialMove(index)) {
|
if (start_level_ > 0 &&
|
||||||
|
TryExtendNonL0TrivialMove(index,
|
||||||
|
ioptions_.compaction_pri ==
|
||||||
|
kRoundRobin /* only_expand_right */)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -2520,6 +2520,61 @@ TEST_F(CompactionPickerTest, L0TrivialMoveWholeL0) {
|
||||||
ASSERT_TRUE(compaction->IsTrivialMove());
|
ASSERT_TRUE(compaction->IsTrivialMove());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(CompactionPickerTest, NonL0TrivialMoveExtendBothDirection) {
|
||||||
|
mutable_cf_options_.max_bytes_for_level_base = 5000;
|
||||||
|
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
|
||||||
|
mutable_cf_options_.max_compaction_bytes = 10000000u;
|
||||||
|
ioptions_.level_compaction_dynamic_level_bytes = false;
|
||||||
|
NewVersionStorage(6, kCompactionStyleLevel);
|
||||||
|
|
||||||
|
Add(1, 1U, "300", "350", 3000U, 0, 710, 800, 3000U);
|
||||||
|
Add(1, 2U, "600", "651", 3001U, 0, 610, 700, 3001U);
|
||||||
|
Add(1, 3U, "700", "750", 3000U, 0, 500, 550, 3000U);
|
||||||
|
Add(2, 4U, "800", "850", 4000U, 0, 150, 200, 4000U);
|
||||||
|
|
||||||
|
UpdateVersionStorageInfo();
|
||||||
|
// File #2 should be picked first, and expand both directions to include
|
||||||
|
// files #1 and #3.
|
||||||
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
||||||
|
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
|
||||||
|
&log_buffer_));
|
||||||
|
ASSERT_TRUE(compaction.get() != nullptr);
|
||||||
|
ASSERT_EQ(1, compaction->num_input_levels());
|
||||||
|
ASSERT_EQ(3, compaction->num_input_files(0));
|
||||||
|
ASSERT_EQ(1, compaction->input(0, 0)->fd.GetNumber());
|
||||||
|
ASSERT_EQ(2, compaction->input(0, 1)->fd.GetNumber());
|
||||||
|
ASSERT_EQ(3, compaction->input(0, 2)->fd.GetNumber());
|
||||||
|
ASSERT_TRUE(compaction->IsTrivialMove());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CompactionPickerTest, L0TrivialMoveToEmptyLevel) {
|
||||||
|
mutable_cf_options_.max_bytes_for_level_base = 5000;
|
||||||
|
mutable_cf_options_.level0_file_num_compaction_trigger = 4;
|
||||||
|
mutable_cf_options_.max_compaction_bytes = 10000000u;
|
||||||
|
ioptions_.level_compaction_dynamic_level_bytes = false;
|
||||||
|
NewVersionStorage(6, kCompactionStyleLevel);
|
||||||
|
|
||||||
|
// File 2 will be picked first, which by itself is trivial movable.
|
||||||
|
// There was a bug before where compaction also picks file 3 and 4,
|
||||||
|
// (and then file 1 since it overlaps with the key range),
|
||||||
|
// which makes the compaction not trivial movable.
|
||||||
|
Add(0, 1U, "450", "599", 3000U, 0, 710, 800, 3000U);
|
||||||
|
Add(0, 2U, "600", "651", 3001U, 0, 610, 700, 3001U);
|
||||||
|
Add(0, 3U, "300", "350", 3000U, 0, 500, 550, 3000U);
|
||||||
|
Add(0, 4U, "500", "550", 2999U, 0, 300, 350, 2999U);
|
||||||
|
|
||||||
|
UpdateVersionStorageInfo();
|
||||||
|
|
||||||
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
||||||
|
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
|
||||||
|
&log_buffer_));
|
||||||
|
ASSERT_TRUE(compaction.get() != nullptr);
|
||||||
|
ASSERT_EQ(1, compaction->num_input_levels());
|
||||||
|
ASSERT_EQ(1, compaction->num_input_files(0));
|
||||||
|
ASSERT_EQ(2, compaction->input(0, 0)->fd.GetNumber());
|
||||||
|
ASSERT_TRUE(compaction->IsTrivialMove());
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) {
|
TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) {
|
||||||
mutable_cf_options_.max_bytes_for_level_base = 10000u;
|
mutable_cf_options_.max_bytes_for_level_base = 10000u;
|
||||||
mutable_cf_options_.max_compaction_bytes = 10001u;
|
mutable_cf_options_.max_compaction_bytes = 10001u;
|
||||||
|
|
|
@ -5509,8 +5509,9 @@ TEST_F(DBCompactionTest, CompactionLimiter) {
|
||||||
|
|
||||||
for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) {
|
for (int n = 0; n < options.level0_file_num_compaction_trigger; n++) {
|
||||||
for (unsigned int cf = 0; cf < cf_count; cf++) {
|
for (unsigned int cf = 0; cf < cf_count; cf++) {
|
||||||
|
// All L0s should overlap with each other
|
||||||
for (int i = 0; i < kNumKeysPerFile; i++) {
|
for (int i = 0; i < kNumKeysPerFile; i++) {
|
||||||
ASSERT_OK(Put(cf, Key(keyIndex++), ""));
|
ASSERT_OK(Put(cf, Key(i), ""));
|
||||||
}
|
}
|
||||||
// put extra key to trigger flush
|
// put extra key to trigger flush
|
||||||
ASSERT_OK(Put(cf, "", ""));
|
ASSERT_OK(Put(cf, "", ""));
|
||||||
|
|
|
@ -629,7 +629,7 @@ class SpecialSkipListFactory : public MemTableRepFactory {
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// After number of inserts exceeds `num_entries_flush` in a mem table, trigger
|
// After number of inserts >= `num_entries_flush` in a mem table, trigger
|
||||||
// flush.
|
// flush.
|
||||||
explicit SpecialSkipListFactory(int num_entries_flush)
|
explicit SpecialSkipListFactory(int num_entries_flush)
|
||||||
: num_entries_flush_(num_entries_flush) {}
|
: num_entries_flush_(num_entries_flush) {}
|
||||||
|
|
Loading…
Reference in a new issue