mirror of https://github.com/facebook/rocksdb.git
Remove the return value of `SetBGError()` (#12792)
Summary: the return value for `ErrorHandler::SetBGError(error)` seems to be not well-defined, it can be `bg_error_` (no matter if the `bg_error_` is set to the input error), ok status or [`recovery_error_`](3ee4d5a11a/db/error_handler.cc (L669)
) from `StartRecoverFromRetryableBGIOError()`. The `recovery_error_` returned may be an OK status. We have only a few places that use the return value of `SetBGError()` and they don't need to do so. Using the return value may even be wrong for example in3ee4d5a11a/db/db_impl/db_impl_write.cc (L2365)
where a non-ok `s` could be overwritten to OK. This PR changes SetBGError() to return void and clean up relevant code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12792 Test Plan: existing unit tests and go over all places where return value of `SetBGError()` is used. Reviewed By: hx235 Differential Revision: D58904898 Pulled By: cbi42 fbshipit-source-id: d58a20ba5a40e3f35367c6034a32c755088c3653
This commit is contained in:
parent
0d93c8a6ca
commit
a31fe52173
|
@ -391,8 +391,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
|||
if (!s.ok()) {
|
||||
io_s = versions_->io_status();
|
||||
if (!io_s.ok()) {
|
||||
s = error_handler_.SetBGError(io_s,
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
error_handler_.SetBGError(io_s,
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -916,8 +916,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
|
|||
read_options, write_options, &edit, &mutex_,
|
||||
directories_.GetDbDir());
|
||||
if (!s.ok() && versions_->io_status().IsIOError()) {
|
||||
s = error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1724,8 +1724,8 @@ Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
|
|||
read_options, write_options, synced_wals, &mutex_,
|
||||
directories_.GetDbDir());
|
||||
if (!status.ok() && versions_->io_status().IsIOError()) {
|
||||
status = error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
|
|
@ -1204,8 +1204,7 @@ void DBImpl::MemTableInsertStatusCheck(const Status& status) {
|
|||
mutex_.Lock();
|
||||
assert(!error_handler_.IsBGWorkStopped());
|
||||
// Maybe change the return status to void?
|
||||
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
|
||||
.PermitUncheckedError();
|
||||
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable);
|
||||
mutex_.Unlock();
|
||||
}
|
||||
}
|
||||
|
@ -2363,8 +2362,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
|
|||
read_options, write_options, &wal_deletion, &mutex_,
|
||||
directories_.GetDbDir());
|
||||
if (!s.ok() && versions_->io_status().IsIOError()) {
|
||||
s = error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
error_handler_.SetBGError(versions_->io_status(),
|
||||
BackgroundErrorReason::kManifestWrite);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
|
|
|
@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() {
|
|||
EndAutoRecovery();
|
||||
}
|
||||
|
||||
STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
||||
|
||||
// This is the main function for looking at an error during a background
|
||||
// operation and deciding the severity, and error recovery strategy. The high
|
||||
// level algorithm is as follows -
|
||||
|
@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
|||
// This can also get called as part of a recovery operation. In that case, we
|
||||
// also track the error separately in recovery_error_ so we can tell in the
|
||||
// end whether recovery succeeded or not
|
||||
const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||
BackgroundErrorReason reason) {
|
||||
void ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||
BackgroundErrorReason reason) {
|
||||
db_mutex_->AssertHeld();
|
||||
if (bg_err.ok()) {
|
||||
return kOkStatus;
|
||||
return;
|
||||
}
|
||||
|
||||
ROCKS_LOG_INFO(db_options_.info_log,
|
||||
|
@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|||
} else {
|
||||
// This error is less severe than previously encountered error. Don't
|
||||
// take any further action
|
||||
return bg_error_;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|||
if (bg_error_.severity() >= Status::Severity::kHardError) {
|
||||
is_db_stopped_.store(true, std::memory_order_release);
|
||||
}
|
||||
return bg_error_;
|
||||
}
|
||||
|
||||
// This is the main function for looking at IO related error during the
|
||||
|
@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|||
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
|
||||
// BackgroundErrorReason reason) will be called to handle other error cases
|
||||
// such as delegating to SstFileManager to handle no space error.
|
||||
const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
||||
BackgroundErrorReason reason) {
|
||||
void ErrorHandler::SetBGError(const Status& bg_status,
|
||||
BackgroundErrorReason reason) {
|
||||
db_mutex_->AssertHeld();
|
||||
Status tmp_status = bg_status;
|
||||
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
|
||||
|
||||
if (bg_io_err.ok()) {
|
||||
return kOkStatus;
|
||||
return;
|
||||
}
|
||||
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
||||
bg_io_err.ToString().c_str());
|
||||
|
@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|||
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
||||
&bg_err, db_mutex_, &auto_recovery);
|
||||
recover_context_ = context;
|
||||
return bg_error_;
|
||||
} else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
||||
(bg_io_err.GetScope() ==
|
||||
IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
||||
bg_io_err.GetRetryable())) {
|
||||
return;
|
||||
}
|
||||
if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
||||
(bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
||||
bg_io_err.GetRetryable())) {
|
||||
// Second, check if the error is a retryable IO error (file scope IO error
|
||||
// is also treated as retryable IO error in RocksDB write path). if it is
|
||||
// retryable error and its severity is higher than bg_error_, overwrite the
|
||||
|
@ -447,7 +444,7 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|||
"ErrorHandler: Compaction will schedule by itself to resume\n");
|
||||
// Not used in this code path.
|
||||
new_bg_io_err.PermitUncheckedError();
|
||||
return bg_error_;
|
||||
return;
|
||||
}
|
||||
|
||||
Status::Severity severity;
|
||||
|
@ -469,10 +466,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|||
Status bg_err(new_bg_io_err, severity);
|
||||
CheckAndSetRecoveryAndBGError(bg_err);
|
||||
recover_context_ = context;
|
||||
return StartRecoverFromRetryableBGIOError(bg_io_err);
|
||||
} else {
|
||||
return HandleKnownErrors(new_bg_io_err, reason);
|
||||
StartRecoverFromRetryableBGIOError(bg_io_err);
|
||||
return;
|
||||
}
|
||||
|
||||
HandleKnownErrors(new_bg_io_err, reason);
|
||||
}
|
||||
|
||||
void ErrorHandler::AddFilesToQuarantine(
|
||||
|
@ -620,23 +618,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
|
|||
return s;
|
||||
}
|
||||
|
||||
const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
||||
void ErrorHandler::StartRecoverFromRetryableBGIOError(
|
||||
const IOStatus& io_error) {
|
||||
db_mutex_->AssertHeld();
|
||||
if (bg_error_.ok()) {
|
||||
return bg_error_;
|
||||
} else if (io_error.ok()) {
|
||||
return kOkStatus;
|
||||
} else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
|
||||
// Auto resume BG error is not enabled, directly return bg_error_.
|
||||
return bg_error_;
|
||||
} else if (end_recovery_) {
|
||||
if (bg_error_.ok() || io_error.ok()) {
|
||||
return;
|
||||
}
|
||||
if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
|
||||
// Auto resume BG error is not enabled
|
||||
return;
|
||||
}
|
||||
if (end_recovery_) {
|
||||
// Can temporarily release db mutex
|
||||
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
|
||||
Status::ShutdownInProgress(),
|
||||
db_mutex_);
|
||||
db_mutex_->AssertHeld();
|
||||
return bg_error_;
|
||||
return;
|
||||
}
|
||||
RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
|
||||
ROCKS_LOG_INFO(
|
||||
|
@ -664,12 +662,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
|||
|
||||
recovery_thread_.reset(
|
||||
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
|
||||
|
||||
if (recovery_error_.ok()) {
|
||||
return recovery_error_;
|
||||
} else {
|
||||
return bg_error_;
|
||||
}
|
||||
}
|
||||
|
||||
// Automatic recover from Retryable BG IO error. Must be called after db
|
||||
|
|
|
@ -56,7 +56,7 @@ class ErrorHandler {
|
|||
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
||||
Status::Code code, Status::SubCode subcode);
|
||||
|
||||
const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
||||
void SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
||||
|
||||
Status GetBGError() const { return bg_error_; }
|
||||
|
||||
|
@ -135,11 +135,10 @@ class ErrorHandler {
|
|||
// unsorted.
|
||||
autovector<uint64_t> files_to_quarantine_;
|
||||
|
||||
const Status& HandleKnownErrors(const Status& bg_err,
|
||||
BackgroundErrorReason reason);
|
||||
void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
|
||||
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
||||
void RecoverFromNoSpace();
|
||||
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
||||
void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
||||
void RecoverFromRetryableBGIOError();
|
||||
// First, if it is in recovery and the recovery_error is ok. Set the
|
||||
// recovery_error_ to bg_err. Second, if the severity is higher than the
|
||||
|
|
|
@ -851,13 +851,17 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) {
|
|||
});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
s = Flush();
|
||||
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||
ASSERT_TRUE(s.IsNoSpace());
|
||||
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
|
||||
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
|
||||
// This Resume() will attempt to create a new manifest file and fail again
|
||||
s = dbfull()->Resume();
|
||||
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||
ASSERT_TRUE(s.IsNoSpace());
|
||||
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
|
||||
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||
|
|
Loading…
Reference in New Issue