mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-25 22:44:05 +00:00
Fix two ErrorHandler
race conditions (#11939)
Summary: 1. Prevent a double join on a `port::Thread` 2. Ensure `recovery_in_prog_` and `bg_error_` are both set under same lock hold. This is useful for writers who see a non-OK `bg_error_` and are deciding whether to stall based on whether the error will be auto-recovered. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11939 Reviewed By: cbi42 Differential Revision: D50155484 Pulled By: ajkr fbshipit-source-id: fbc1f85c50e7eaee27ee0e376aee688d8a06c93b
This commit is contained in:
parent
5b11f5a3a2
commit
4bd5aa4f55
|
@ -638,16 +638,22 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
|||
ROCKS_LOG_INFO(
|
||||
db_options_.info_log,
|
||||
"ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n");
|
||||
// Needs to be set in the same lock hold as setting BG error, otherwise
|
||||
// intervening writes could see a BG error without a recovery and bail out.
|
||||
recovery_in_prog_ = true;
|
||||
|
||||
if (recovery_thread_) {
|
||||
// Ensure only one thread can execute the join().
|
||||
std::unique_ptr<port::Thread> old_recovery_thread(
|
||||
std::move(recovery_thread_));
|
||||
// In this case, if recovery_in_prog_ is false, current thread should
|
||||
// wait the previous recover thread to finish and create a new thread
|
||||
// to recover from the bg error.
|
||||
db_mutex_->Unlock();
|
||||
recovery_thread_->join();
|
||||
old_recovery_thread->join();
|
||||
db_mutex_->Lock();
|
||||
}
|
||||
|
||||
recovery_in_prog_ = true;
|
||||
TEST_SYNC_POINT("StartRecoverFromRetryableBGIOError::in_progress");
|
||||
recovery_thread_.reset(
|
||||
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
|
||||
|
@ -790,12 +796,15 @@ void ErrorHandler::EndAutoRecovery() {
|
|||
if (!end_recovery_) {
|
||||
end_recovery_ = true;
|
||||
}
|
||||
cv_.SignalAll();
|
||||
db_mutex_->Unlock();
|
||||
if (recovery_thread_) {
|
||||
recovery_thread_->join();
|
||||
// Ensure only one thread can execute the join().
|
||||
std::unique_ptr<port::Thread> old_recovery_thread(
|
||||
std::move(recovery_thread_));
|
||||
db_mutex_->Unlock();
|
||||
cv_.SignalAll();
|
||||
old_recovery_thread->join();
|
||||
db_mutex_->Lock();
|
||||
}
|
||||
db_mutex_->Lock();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue