mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 07:30:54 +00:00
Fix a race condition between recovery and backup (#11955)
Summary: A race condition between recovery and backup can happen with error messages like this: ```Failure in BackupEngine::CreateNewBackup with: IO error: No such file or directory: While opening a file for sequentially reading: /dev/shm/rocksdb_test/rocksdb_crashtest_whitebox/002653.log: No such file or directory``` PR https://github.com/facebook/rocksdb/issues/6949 introduced disabling file deletion during error handling of manifest IO errors. Aformentioned race condition is caused by this chain of event: [Backup engine] disable file deletion [Recovery] disable file deletion <= this is optional for the race condition, it may or may not get called [Backup engine] get list of file to copy/link [Recovery] force enable file deletion .... some files refered by backup engine get deleted [Backup engine] copy/link file <= error no file found This PR fixes this with: 1) Recovery thread is currently forcing enabling file deletion as long as file deletion is disabled. Regardless of whether the previous error handling is for manifest IO error and that disabled it in the first place. This means it could incorrectly enabling file deletions intended by other threads like backup threads, file snapshotting threads. This PR does this check explicitly before making the call. 2) `disable_delete_obsolete_files_` is designed as a counter to allow different threads to enable and disable file deletion separately. The recovery thread currently does a force enable file deletion, because `ErrorHandler::SetBGError()` can be called multiple times by different threads when they receive a manifest IO error(details per PR https://github.com/facebook/rocksdb/issues/6949), resulting in `DBImpl::DisableFileDeletions` to be called multiple times too. Making a force enable file deletion call that resets the counter `disable_delete_obsolete_files_` to zero is a workaround for this. However, as it shows in the race condition, it can incorrectly suppress other threads like a backup thread's intention to keep the file deletion disabled. <strike>This PR adds a `std::atomic<int> disable_file_deletion_count_` to the error handler to track the needed counter decrease more precisely</strike>. This PR tracks and caps file deletion enabling/disabling in error handler. 3) for recovery, the section to find obsolete files and purge them was moved to be done after the attempt to enable file deletion. The actual finding and purging is more likely to happen if file deletion was previously disabled and get re-enabled now. An internal function `DBImpl::EnableFileDeletionsWithLock` was added to support change 2) and 3). Some useful logging was explicitly added to keep those log messages around. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11955 Test Plan: existing unit tests Reviewed By: anand1976 Differential Revision: D50290592 Pulled By: jowlyzhang fbshipit-source-id: 73aa8331ca4d636955a5b0324b1e104a26e00c9b
This commit is contained in:
parent
9135a61ec6
commit
933ee295f4
|
@ -354,7 +354,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the IO Status stored in version set is set to OK.
|
// Make sure the IO Status stored in version set is set to OK.
|
||||||
bool file_deletion_disabled = !IsFileDeletionsEnabled();
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
IOStatus io_s = versions_->io_status();
|
IOStatus io_s = versions_->io_status();
|
||||||
if (io_s.IsIOError()) {
|
if (io_s.IsIOError()) {
|
||||||
|
@ -363,7 +362,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
// clean-up phase MANIFEST writing. We must have also disabled file
|
// clean-up phase MANIFEST writing. We must have also disabled file
|
||||||
// deletions.
|
// deletions.
|
||||||
assert(!versions_->descriptor_log_);
|
assert(!versions_->descriptor_log_);
|
||||||
assert(file_deletion_disabled);
|
assert(!IsFileDeletionsEnabled());
|
||||||
// Since we are trying to recover from MANIFEST write error, we need to
|
// Since we are trying to recover from MANIFEST write error, we need to
|
||||||
// switch to a new MANIFEST anyway. The old MANIFEST can be corrupted.
|
// switch to a new MANIFEST anyway. The old MANIFEST can be corrupted.
|
||||||
// Therefore, force writing a dummy version edit because we do not know
|
// Therefore, force writing a dummy version edit because we do not know
|
||||||
|
@ -406,34 +405,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JobContext job_context(0);
|
|
||||||
FindObsoleteFiles(&job_context, true);
|
|
||||||
mutex_.Unlock();
|
|
||||||
|
|
||||||
job_context.manifest_file_number = 1;
|
|
||||||
if (job_context.HaveSomethingToDelete()) {
|
|
||||||
PurgeObsoleteFiles(job_context);
|
|
||||||
}
|
|
||||||
job_context.Clean();
|
|
||||||
|
|
||||||
if (s.ok()) {
|
|
||||||
assert(versions_->io_status().ok());
|
|
||||||
// If we reach here, we should re-enable file deletions if it was disabled
|
|
||||||
// during previous error handling.
|
|
||||||
if (file_deletion_disabled) {
|
|
||||||
// Always return ok
|
|
||||||
s = EnableFileDeletions(/*force=*/true);
|
|
||||||
if (!s.ok()) {
|
|
||||||
ROCKS_LOG_INFO(
|
|
||||||
immutable_db_options_.info_log,
|
|
||||||
"DB resume requested but could not enable file deletions [%s]",
|
|
||||||
s.ToString().c_str());
|
|
||||||
assert(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_.Lock();
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// This will notify and unblock threads waiting for error recovery to
|
// This will notify and unblock threads waiting for error recovery to
|
||||||
// finish. Those previouly waiting threads can now proceed, which may
|
// finish. Those previouly waiting threads can now proceed, which may
|
||||||
|
@ -446,6 +417,15 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
error_handler_.GetRecoveryError().PermitUncheckedError();
|
error_handler_.GetRecoveryError().PermitUncheckedError();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JobContext job_context(0);
|
||||||
|
FindObsoleteFiles(&job_context, true);
|
||||||
|
mutex_.Unlock();
|
||||||
|
job_context.manifest_file_number = 1;
|
||||||
|
if (job_context.HaveSomethingToDelete()) {
|
||||||
|
PurgeObsoleteFiles(job_context);
|
||||||
|
}
|
||||||
|
job_context.Clean();
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
|
ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
|
||||||
} else {
|
} else {
|
||||||
|
@ -453,6 +433,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
s.ToString().c_str());
|
s.ToString().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mutex_.Lock();
|
||||||
// Check for shutdown again before scheduling further compactions,
|
// Check for shutdown again before scheduling further compactions,
|
||||||
// since we released and re-acquired the lock above
|
// since we released and re-acquired the lock above
|
||||||
if (shutdown_initiated_) {
|
if (shutdown_initiated_) {
|
||||||
|
|
|
@ -2373,6 +2373,10 @@ class DBImpl : public DB {
|
||||||
|
|
||||||
Status DisableFileDeletionsWithLock();
|
Status DisableFileDeletionsWithLock();
|
||||||
|
|
||||||
|
// Safely decrease `disable_delete_obsolete_files_` by one while holding lock
|
||||||
|
// and return its remaning value.
|
||||||
|
int EnableFileDeletionsWithLock();
|
||||||
|
|
||||||
Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
|
Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
|
||||||
std::string ts_low);
|
std::string ts_low);
|
||||||
|
|
||||||
|
|
|
@ -100,6 +100,14 @@ Status DBImpl::EnableFileDeletions(bool force) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int DBImpl::EnableFileDeletionsWithLock() {
|
||||||
|
mutex_.AssertHeld();
|
||||||
|
// In case others have called EnableFileDeletions(true /* force */) in between
|
||||||
|
disable_delete_obsolete_files_ =
|
||||||
|
std::max(0, disable_delete_obsolete_files_ - 1);
|
||||||
|
return disable_delete_obsolete_files_;
|
||||||
|
}
|
||||||
|
|
||||||
bool DBImpl::IsFileDeletionsEnabled() const {
|
bool DBImpl::IsFileDeletionsEnabled() const {
|
||||||
return 0 == disable_delete_obsolete_files_;
|
return 0 == disable_delete_obsolete_files_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -396,11 +396,13 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
||||||
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
||||||
bg_io_err.ToString().c_str());
|
bg_io_err.ToString().c_str());
|
||||||
|
|
||||||
if (BackgroundErrorReason::kManifestWrite == reason ||
|
if (!recovery_disabled_file_deletion_ &&
|
||||||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
|
(BackgroundErrorReason::kManifestWrite == reason ||
|
||||||
|
BackgroundErrorReason::kManifestWriteNoWAL == reason)) {
|
||||||
// Always returns ok
|
// Always returns ok
|
||||||
ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions");
|
ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions");
|
||||||
db_->DisableFileDeletionsWithLock().PermitUncheckedError();
|
db_->DisableFileDeletionsWithLock().PermitUncheckedError();
|
||||||
|
recovery_disabled_file_deletion_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status new_bg_io_err = bg_io_err;
|
Status new_bg_io_err = bg_io_err;
|
||||||
|
@ -560,6 +562,18 @@ Status ErrorHandler::ClearBGError() {
|
||||||
recovery_error_.PermitUncheckedError();
|
recovery_error_.PermitUncheckedError();
|
||||||
recovery_in_prog_ = false;
|
recovery_in_prog_ = false;
|
||||||
soft_error_no_bg_work_ = false;
|
soft_error_no_bg_work_ = false;
|
||||||
|
if (recovery_disabled_file_deletion_) {
|
||||||
|
recovery_disabled_file_deletion_ = false;
|
||||||
|
int remain_counter = db_->EnableFileDeletionsWithLock();
|
||||||
|
if (remain_counter == 0) {
|
||||||
|
ROCKS_LOG_INFO(db_options_.info_log, "File Deletions Enabled");
|
||||||
|
} else {
|
||||||
|
ROCKS_LOG_WARN(
|
||||||
|
db_options_.info_log,
|
||||||
|
"File Deletions Enable, but not really enabled. Counter: %d",
|
||||||
|
remain_counter);
|
||||||
|
}
|
||||||
|
}
|
||||||
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error,
|
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error,
|
||||||
bg_error_, db_mutex_);
|
bg_error_, db_mutex_);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,8 @@ class ErrorHandler {
|
||||||
recovery_in_prog_(false),
|
recovery_in_prog_(false),
|
||||||
soft_error_no_bg_work_(false),
|
soft_error_no_bg_work_(false),
|
||||||
is_db_stopped_(false),
|
is_db_stopped_(false),
|
||||||
bg_error_stats_(db_options.statistics) {
|
bg_error_stats_(db_options.statistics),
|
||||||
|
recovery_disabled_file_deletion_(false) {
|
||||||
// Clear the checked flag for uninitialized errors
|
// Clear the checked flag for uninitialized errors
|
||||||
bg_error_.PermitUncheckedError();
|
bg_error_.PermitUncheckedError();
|
||||||
recovery_error_.PermitUncheckedError();
|
recovery_error_.PermitUncheckedError();
|
||||||
|
@ -108,6 +109,10 @@ class ErrorHandler {
|
||||||
// The pointer of DB statistics.
|
// The pointer of DB statistics.
|
||||||
std::shared_ptr<Statistics> bg_error_stats_;
|
std::shared_ptr<Statistics> bg_error_stats_;
|
||||||
|
|
||||||
|
// Tracks whether the recovery has disabled file deletion. This boolean flag
|
||||||
|
// is updated while holding db mutex.
|
||||||
|
bool recovery_disabled_file_deletion_;
|
||||||
|
|
||||||
const Status& HandleKnownErrors(const Status& bg_err,
|
const Status& HandleKnownErrors(const Status& bg_err,
|
||||||
BackgroundErrorReason reason);
|
BackgroundErrorReason reason);
|
||||||
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
||||||
|
|
Loading…
Reference in a new issue