mirror of https://github.com/facebook/rocksdb.git
Fix checkpoint stuck (#7921)
Summary: ## 1. Bug description: When RocksDB Checkpoint, it may be stuck in `WaitUntilFlushWouldNotStallWrites` method. ## 2. Simple analysis of the reasons: ### 2.1 Configuration parameters: ```yaml Compaction Style : Universal max_write_buffer_number : 4 min_write_buffer_number_to_merge : 3 ``` Checkpoint is usually very fast. When the Checkpoint is executed, `WaitUntilFlushWouldNotStallWrites` is called. If there are 2 Immutable MemTables, which are less than `min_write_buffer_number_to_merge`, they will not be flushed. But will enter this code. ```c++ // method: GetWriteStallConditionAndCause if (mutable_cf_options.max_write_buffer_number> 3 && num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number-1) { return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit}; } ``` code link:fbed72f03c/db/column_family.cc (L847)
Checkpoint thought there was a FlushJob, but it didn't. So will always wait. ### 2.2 solution: Increase the restriction: the `number of Immutable MemTable` >= `min_write_buffer_number_to_merge will wait`. If there are other better solutions, you can correct me. ### 2.3 Code that can reproduce the problem: https://github.com/1996fanrui/fanrui-learning/blob/flink-1.12/module-java/src/main/java/com/dream/rocksdb/RocksDBCheckpointStuck.java ## 3. Interesting point This bug will be triggered only when `the number of sorted runs >= level0_file_num_compaction_trigger`. Because there is a break in WaitUntilFlushWouldNotStallWrites. ```c++ if (cfd->imm()->NumNotFlushed() < cfd->ioptions()->min_write_buffer_number_to_merge && vstorage->l0_delay_trigger_count() < mutable_cf_options.level0_file_num_compaction_trigger) { break; } ``` code link:fbed72f03c/db/db_impl/db_impl_compaction_flush.cc (L1974)
Universal may have `l0_delay_trigger_count() >= level0_file_num_compaction_trigger`, so this bug is triggered. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7921 Reviewed By: jay-zhuang Differential Revision: D26900559 Pulled By: ajkr fbshipit-source-id: 133c1252dad7393753f04a47590b68c7d8e670df
This commit is contained in:
parent
d2e9eab1ea
commit
67d72fb5dc
|
@ -835,7 +835,8 @@ std::pair<WriteStallCondition, ColumnFamilyData::WriteStallCause>
|
|||
ColumnFamilyData::GetWriteStallConditionAndCause(
|
||||
int num_unflushed_memtables, int num_l0_files,
|
||||
uint64_t num_compaction_needed_bytes,
|
||||
const MutableCFOptions& mutable_cf_options) {
|
||||
const MutableCFOptions& mutable_cf_options,
|
||||
const ImmutableCFOptions& immutable_cf_options) {
|
||||
if (num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number) {
|
||||
return {WriteStallCondition::kStopped, WriteStallCause::kMemtableLimit};
|
||||
} else if (!mutable_cf_options.disable_auto_compactions &&
|
||||
|
@ -849,7 +850,9 @@ ColumnFamilyData::GetWriteStallConditionAndCause(
|
|||
WriteStallCause::kPendingCompactionBytes};
|
||||
} else if (mutable_cf_options.max_write_buffer_number > 3 &&
|
||||
num_unflushed_memtables >=
|
||||
mutable_cf_options.max_write_buffer_number - 1) {
|
||||
mutable_cf_options.max_write_buffer_number - 1 &&
|
||||
num_unflushed_memtables - 1 >=
|
||||
immutable_cf_options.min_write_buffer_number_to_merge) {
|
||||
return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit};
|
||||
} else if (!mutable_cf_options.disable_auto_compactions &&
|
||||
mutable_cf_options.level0_slowdown_writes_trigger >= 0 &&
|
||||
|
@ -877,7 +880,8 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
|
|||
|
||||
auto write_stall_condition_and_cause = GetWriteStallConditionAndCause(
|
||||
imm()->NumNotFlushed(), vstorage->l0_delay_trigger_count(),
|
||||
vstorage->estimated_compaction_needed_bytes(), mutable_cf_options);
|
||||
vstorage->estimated_compaction_needed_bytes(), mutable_cf_options,
|
||||
*ioptions());
|
||||
write_stall_condition = write_stall_condition_and_cause.first;
|
||||
auto write_stall_cause = write_stall_condition_and_cause.second;
|
||||
|
||||
|
|
|
@ -475,9 +475,11 @@ class ColumnFamilyData {
|
|||
kPendingCompactionBytes,
|
||||
};
|
||||
static std::pair<WriteStallCondition, WriteStallCause>
|
||||
GetWriteStallConditionAndCause(int num_unflushed_memtables, int num_l0_files,
|
||||
uint64_t num_compaction_needed_bytes,
|
||||
const MutableCFOptions& mutable_cf_options);
|
||||
GetWriteStallConditionAndCause(
|
||||
int num_unflushed_memtables, int num_l0_files,
|
||||
uint64_t num_compaction_needed_bytes,
|
||||
const MutableCFOptions& mutable_cf_options,
|
||||
const ImmutableCFOptions& immutable_cf_options);
|
||||
|
||||
// Recalculate some small conditions, which are changed only during
|
||||
// compaction, adding new memtable and/or
|
||||
|
|
|
@ -2061,12 +2061,12 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|||
// check whether one extra immutable memtable or an extra L0 file would
|
||||
// cause write stalling mode to be entered. It could still enter stall
|
||||
// mode due to pending compaction bytes, but that's less common
|
||||
write_stall_condition =
|
||||
ColumnFamilyData::GetWriteStallConditionAndCause(
|
||||
cfd->imm()->NumNotFlushed() + 1,
|
||||
vstorage->l0_delay_trigger_count() + 1,
|
||||
vstorage->estimated_compaction_needed_bytes(), mutable_cf_options)
|
||||
.first;
|
||||
write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
|
||||
cfd->imm()->NumNotFlushed() + 1,
|
||||
vstorage->l0_delay_trigger_count() + 1,
|
||||
vstorage->estimated_compaction_needed_bytes(),
|
||||
mutable_cf_options, *cfd->ioptions())
|
||||
.first;
|
||||
} while (write_stall_condition != WriteStallCondition::kNormal);
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
Loading…
Reference in New Issue