From a31fe521732c6150003ea43f1e30f27f13be597c Mon Sep 17 00:00:00 2001 From: Changyu Bi <102700264+cbi42@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:17:05 -0700 Subject: [PATCH] Remove the return value of `SetBGError()` (#12792) Summary: the return value for `ErrorHandler::SetBGError(error)` seems to be not well-defined, it can be `bg_error_` (no matter if the `bg_error_` is set to the input error), ok status or [`recovery_error_`](https://github.com/facebook/rocksdb/blob/3ee4d5a11a882056b341a9a1694a71371a39f664/db/error_handler.cc#L669) from `StartRecoverFromRetryableBGIOError()`. The `recovery_error_` returned may be an OK status. We have only a few places that use the return value of `SetBGError()` and they don't need to do so. Using the return value may even be wrong for example in https://github.com/facebook/rocksdb/blob/3ee4d5a11a882056b341a9a1694a71371a39f664/db/db_impl/db_impl_write.cc#L2365 where a non-ok `s` could be overwritten to OK. This PR changes SetBGError() to return void and clean up relevant code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12792 Test Plan: existing unit tests and go over all places where return value of `SetBGError()` is used. Reviewed By: hx235 Differential Revision: D58904898 Pulled By: cbi42 fbshipit-source-id: d58a20ba5a40e3f35367c6034a32c755088c3653 --- db/db_impl/db_impl.cc | 12 +++---- db/db_impl/db_impl_write.cc | 7 ++--- db/error_handler.cc | 62 ++++++++++++++++--------------------- db/error_handler.h | 7 ++--- db/error_handler_fs_test.cc | 8 +++-- 5 files changed, 45 insertions(+), 51 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f58f615f5f..537bd775b8 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -391,8 +391,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { if (!s.ok()) { io_s = versions_->io_status(); if (!io_s.ok()) { - s = error_handler_.SetBGError(io_s, - BackgroundErrorReason::kManifestWrite); + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWrite); } } } @@ -916,8 +916,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options, read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); if (!s.ok() && versions_->io_status().IsIOError()) { - s = error_handler_.SetBGError(versions_->io_status(), - BackgroundErrorReason::kManifestWrite); + error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); } } @@ -1724,8 +1724,8 @@ Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options, read_options, write_options, synced_wals, &mutex_, directories_.GetDbDir()); if (!status.ok() && versions_->io_status().IsIOError()) { - status = error_handler_.SetBGError(versions_->io_status(), - BackgroundErrorReason::kManifestWrite); + error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); } return status; } diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 73c7cc7ee9..1595aa9139 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1204,8 +1204,7 @@ void DBImpl::MemTableInsertStatusCheck(const Status& status) { mutex_.Lock(); assert(!error_handler_.IsBGWorkStopped()); // Maybe change the return status to void? - error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable) - .PermitUncheckedError(); + error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable); mutex_.Unlock(); } } @@ -2363,8 +2362,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { read_options, write_options, &wal_deletion, &mutex_, directories_.GetDbDir()); if (!s.ok() && versions_->io_status().IsIOError()) { - s = error_handler_.SetBGError(versions_->io_status(), - BackgroundErrorReason::kManifestWrite); + error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); } if (!s.ok()) { return s; diff --git a/db/error_handler.cc b/db/error_handler.cc index 35148e72e5..33cff31a36 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() { EndAutoRecovery(); } -STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()}; - // This is the main function for looking at an error during a background // operation and deciding the severity, and error recovery strategy. The high // level algorithm is as follows - @@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()}; // This can also get called as part of a recovery operation. In that case, we // also track the error separately in recovery_error_ so we can tell in the // end whether recovery succeeded or not -const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err, - BackgroundErrorReason reason) { +void ErrorHandler::HandleKnownErrors(const Status& bg_err, + BackgroundErrorReason reason) { db_mutex_->AssertHeld(); if (bg_err.ok()) { - return kOkStatus; + return; } ROCKS_LOG_INFO(db_options_.info_log, @@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err, } else { // This error is less severe than previously encountered error. Don't // take any further action - return bg_error_; + return; } } @@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err, if (bg_error_.severity() >= Status::Severity::kHardError) { is_db_stopped_.store(true, std::memory_order_release); } - return bg_error_; } // This is the main function for looking at IO related error during the @@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err, // 3) for other cases, HandleKnownErrors(const Status& bg_err, // BackgroundErrorReason reason) will be called to handle other error cases // such as delegating to SstFileManager to handle no space error. -const Status& ErrorHandler::SetBGError(const Status& bg_status, - BackgroundErrorReason reason) { +void ErrorHandler::SetBGError(const Status& bg_status, + BackgroundErrorReason reason) { db_mutex_->AssertHeld(); Status tmp_status = bg_status; IOStatus bg_io_err = status_to_io_status(std::move(tmp_status)); if (bg_io_err.ok()) { - return kOkStatus; + return; } ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); @@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &bg_err, db_mutex_, &auto_recovery); recover_context_ = context; - return bg_error_; - } else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace && - (bg_io_err.GetScope() == - IOStatus::IOErrorScope::kIOErrorScopeFile || - bg_io_err.GetRetryable())) { + return; + } + if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace && + (bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile || + bg_io_err.GetRetryable())) { // Second, check if the error is a retryable IO error (file scope IO error // is also treated as retryable IO error in RocksDB write path). if it is // retryable error and its severity is higher than bg_error_, overwrite the @@ -447,7 +444,7 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, "ErrorHandler: Compaction will schedule by itself to resume\n"); // Not used in this code path. new_bg_io_err.PermitUncheckedError(); - return bg_error_; + return; } Status::Severity severity; @@ -469,10 +466,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, Status bg_err(new_bg_io_err, severity); CheckAndSetRecoveryAndBGError(bg_err); recover_context_ = context; - return StartRecoverFromRetryableBGIOError(bg_io_err); - } else { - return HandleKnownErrors(new_bg_io_err, reason); + StartRecoverFromRetryableBGIOError(bg_io_err); + return; } + + HandleKnownErrors(new_bg_io_err, reason); } void ErrorHandler::AddFilesToQuarantine( @@ -620,23 +618,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) { return s; } -const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( +void ErrorHandler::StartRecoverFromRetryableBGIOError( const IOStatus& io_error) { db_mutex_->AssertHeld(); - if (bg_error_.ok()) { - return bg_error_; - } else if (io_error.ok()) { - return kOkStatus; - } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { - // Auto resume BG error is not enabled, directly return bg_error_. - return bg_error_; - } else if (end_recovery_) { + if (bg_error_.ok() || io_error.ok()) { + return; + } + if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { + // Auto resume BG error is not enabled + return; + } + if (end_recovery_) { // Can temporarily release db mutex EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, Status::ShutdownInProgress(), db_mutex_); db_mutex_->AssertHeld(); - return bg_error_; + return; } RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */); ROCKS_LOG_INFO( @@ -664,12 +662,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( recovery_thread_.reset( new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); - - if (recovery_error_.ok()) { - return recovery_error_; - } else { - return bg_error_; - } } // Automatic recover from Retryable BG IO error. Must be called after db diff --git a/db/error_handler.h b/db/error_handler.h index 0188471ac1..b257f69cf0 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -56,7 +56,7 @@ class ErrorHandler { Status::Severity GetErrorSeverity(BackgroundErrorReason reason, Status::Code code, Status::SubCode subcode); - const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason); + void SetBGError(const Status& bg_err, BackgroundErrorReason reason); Status GetBGError() const { return bg_error_; } @@ -135,11 +135,10 @@ class ErrorHandler { // unsorted. autovector files_to_quarantine_; - const Status& HandleKnownErrors(const Status& bg_err, - BackgroundErrorReason reason); + void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason); Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery); void RecoverFromNoSpace(); - const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error); + void StartRecoverFromRetryableBGIOError(const IOStatus& io_error); void RecoverFromRetryableBGIOError(); // First, if it is in recovery and the recovery_error is ok. Set the // recovery_error_ to bg_err. Second, if the severity is higher than the diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index a4eaad1531..57c3c0dcdd 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -851,13 +851,17 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + ASSERT_TRUE(s.IsNoSpace()); + ASSERT_EQ(dbfull()->TEST_GetBGError().severity(), + ROCKSDB_NAMESPACE::Status::Severity::kHardError); ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); fault_fs_->SetFilesystemActive(true); // This Resume() will attempt to create a new manifest file and fail again s = dbfull()->Resume(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + ASSERT_TRUE(s.IsNoSpace()); + ASSERT_EQ(dbfull()->TEST_GetBGError().severity(), + ROCKSDB_NAMESPACE::Status::Severity::kHardError); ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); fault_fs_->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks();