diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 88880996e0..ab3a79e994 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -354,7 +354,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { } // Make sure the IO Status stored in version set is set to OK. - bool file_deletion_disabled = !IsFileDeletionsEnabled(); if (s.ok()) { IOStatus io_s = versions_->io_status(); if (io_s.IsIOError()) { @@ -363,7 +362,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // clean-up phase MANIFEST writing. We must have also disabled file // deletions. assert(!versions_->descriptor_log_); - assert(file_deletion_disabled); + assert(!IsFileDeletionsEnabled()); // Since we are trying to recover from MANIFEST write error, we need to // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted. // Therefore, force writing a dummy version edit because we do not know @@ -406,34 +405,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { } } - JobContext job_context(0); - FindObsoleteFiles(&job_context, true); - mutex_.Unlock(); - - job_context.manifest_file_number = 1; - if (job_context.HaveSomethingToDelete()) { - PurgeObsoleteFiles(job_context); - } - job_context.Clean(); - - if (s.ok()) { - assert(versions_->io_status().ok()); - // If we reach here, we should re-enable file deletions if it was disabled - // during previous error handling. - if (file_deletion_disabled) { - // Always return ok - s = EnableFileDeletions(/*force=*/true); - if (!s.ok()) { - ROCKS_LOG_INFO( - immutable_db_options_.info_log, - "DB resume requested but could not enable file deletions [%s]", - s.ToString().c_str()); - assert(false); - } - } - } - - mutex_.Lock(); if (s.ok()) { // This will notify and unblock threads waiting for error recovery to // finish. Those previouly waiting threads can now proceed, which may @@ -446,6 +417,15 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { error_handler_.GetRecoveryError().PermitUncheckedError(); } + JobContext job_context(0); + FindObsoleteFiles(&job_context, true); + mutex_.Unlock(); + job_context.manifest_file_number = 1; + if (job_context.HaveSomethingToDelete()) { + PurgeObsoleteFiles(job_context); + } + job_context.Clean(); + if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); } else { @@ -453,6 +433,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { s.ToString().c_str()); } + mutex_.Lock(); // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above if (shutdown_initiated_) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index cf6d1d300e..10e8af97e2 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2373,6 +2373,10 @@ class DBImpl : public DB { Status DisableFileDeletionsWithLock(); + // Safely decrease `disable_delete_obsolete_files_` by one while holding lock + // and return its remaning value. + int EnableFileDeletionsWithLock(); + Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, std::string ts_low); diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 9e2fbb73d0..8f72e17f92 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -100,6 +100,14 @@ Status DBImpl::EnableFileDeletions(bool force) { return Status::OK(); } +int DBImpl::EnableFileDeletionsWithLock() { + mutex_.AssertHeld(); + // In case others have called EnableFileDeletions(true /* force */) in between + disable_delete_obsolete_files_ = + std::max(0, disable_delete_obsolete_files_ - 1); + return disable_delete_obsolete_files_; +} + bool DBImpl::IsFileDeletionsEnabled() const { return 0 == disable_delete_obsolete_files_; } diff --git a/db/error_handler.cc b/db/error_handler.cc index c7dd4750f7..95b9a0fe6a 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -396,11 +396,13 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); - if (BackgroundErrorReason::kManifestWrite == reason || - BackgroundErrorReason::kManifestWriteNoWAL == reason) { + if (!recovery_disabled_file_deletion_ && + (BackgroundErrorReason::kManifestWrite == reason || + BackgroundErrorReason::kManifestWriteNoWAL == reason)) { // Always returns ok ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions"); db_->DisableFileDeletionsWithLock().PermitUncheckedError(); + recovery_disabled_file_deletion_ = true; } Status new_bg_io_err = bg_io_err; @@ -560,6 +562,18 @@ Status ErrorHandler::ClearBGError() { recovery_error_.PermitUncheckedError(); recovery_in_prog_ = false; soft_error_no_bg_work_ = false; + if (recovery_disabled_file_deletion_) { + recovery_disabled_file_deletion_ = false; + int remain_counter = db_->EnableFileDeletionsWithLock(); + if (remain_counter == 0) { + ROCKS_LOG_INFO(db_options_.info_log, "File Deletions Enabled"); + } else { + ROCKS_LOG_WARN( + db_options_.info_log, + "File Deletions Enable, but not really enabled. Counter: %d", + remain_counter); + } + } EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error, bg_error_, db_mutex_); } diff --git a/db/error_handler.h b/db/error_handler.h index f444a8f8d9..ace28962dd 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -42,7 +42,8 @@ class ErrorHandler { recovery_in_prog_(false), soft_error_no_bg_work_(false), is_db_stopped_(false), - bg_error_stats_(db_options.statistics) { + bg_error_stats_(db_options.statistics), + recovery_disabled_file_deletion_(false) { // Clear the checked flag for uninitialized errors bg_error_.PermitUncheckedError(); recovery_error_.PermitUncheckedError(); @@ -108,6 +109,10 @@ class ErrorHandler { // The pointer of DB statistics. std::shared_ptr bg_error_stats_; + // Tracks whether the recovery has disabled file deletion. This boolean flag + // is updated while holding db mutex. + bool recovery_disabled_file_deletion_; + const Status& HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason); Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);