mirror of https://github.com/facebook/rocksdb.git
Remove the return value of `SetBGError()` (#12792)
Summary: the return value for `ErrorHandler::SetBGError(error)` seems to be not well-defined, it can be `bg_error_` (no matter if the `bg_error_` is set to the input error), ok status or [`recovery_error_`](3ee4d5a11a/db/error_handler.cc (L669)
) from `StartRecoverFromRetryableBGIOError()`. The `recovery_error_` returned may be an OK status. We have only a few places that use the return value of `SetBGError()` and they don't need to do so. Using the return value may even be wrong for example in3ee4d5a11a/db/db_impl/db_impl_write.cc (L2365)
where a non-ok `s` could be overwritten to OK. This PR changes SetBGError() to return void and clean up relevant code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12792 Test Plan: existing unit tests and go over all places where return value of `SetBGError()` is used. Reviewed By: hx235 Differential Revision: D58904898 Pulled By: cbi42 fbshipit-source-id: d58a20ba5a40e3f35367c6034a32c755088c3653
This commit is contained in:
parent
0d93c8a6ca
commit
a31fe52173
|
@ -391,8 +391,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
io_s = versions_->io_status();
|
io_s = versions_->io_status();
|
||||||
if (!io_s.ok()) {
|
if (!io_s.ok()) {
|
||||||
s = error_handler_.SetBGError(io_s,
|
error_handler_.SetBGError(io_s,
|
||||||
BackgroundErrorReason::kManifestWrite);
|
BackgroundErrorReason::kManifestWrite);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -916,8 +916,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
|
||||||
read_options, write_options, &edit, &mutex_,
|
read_options, write_options, &edit, &mutex_,
|
||||||
directories_.GetDbDir());
|
directories_.GetDbDir());
|
||||||
if (!s.ok() && versions_->io_status().IsIOError()) {
|
if (!s.ok() && versions_->io_status().IsIOError()) {
|
||||||
s = error_handler_.SetBGError(versions_->io_status(),
|
error_handler_.SetBGError(versions_->io_status(),
|
||||||
BackgroundErrorReason::kManifestWrite);
|
BackgroundErrorReason::kManifestWrite);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1724,8 +1724,8 @@ Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
|
||||||
read_options, write_options, synced_wals, &mutex_,
|
read_options, write_options, synced_wals, &mutex_,
|
||||||
directories_.GetDbDir());
|
directories_.GetDbDir());
|
||||||
if (!status.ok() && versions_->io_status().IsIOError()) {
|
if (!status.ok() && versions_->io_status().IsIOError()) {
|
||||||
status = error_handler_.SetBGError(versions_->io_status(),
|
error_handler_.SetBGError(versions_->io_status(),
|
||||||
BackgroundErrorReason::kManifestWrite);
|
BackgroundErrorReason::kManifestWrite);
|
||||||
}
|
}
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1204,8 +1204,7 @@ void DBImpl::MemTableInsertStatusCheck(const Status& status) {
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
assert(!error_handler_.IsBGWorkStopped());
|
assert(!error_handler_.IsBGWorkStopped());
|
||||||
// Maybe change the return status to void?
|
// Maybe change the return status to void?
|
||||||
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
|
error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable);
|
||||||
.PermitUncheckedError();
|
|
||||||
mutex_.Unlock();
|
mutex_.Unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2363,8 +2362,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
|
||||||
read_options, write_options, &wal_deletion, &mutex_,
|
read_options, write_options, &wal_deletion, &mutex_,
|
||||||
directories_.GetDbDir());
|
directories_.GetDbDir());
|
||||||
if (!s.ok() && versions_->io_status().IsIOError()) {
|
if (!s.ok() && versions_->io_status().IsIOError()) {
|
||||||
s = error_handler_.SetBGError(versions_->io_status(),
|
error_handler_.SetBGError(versions_->io_status(),
|
||||||
BackgroundErrorReason::kManifestWrite);
|
BackgroundErrorReason::kManifestWrite);
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
return s;
|
return s;
|
||||||
|
|
|
@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() {
|
||||||
EndAutoRecovery();
|
EndAutoRecovery();
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
|
||||||
|
|
||||||
// This is the main function for looking at an error during a background
|
// This is the main function for looking at an error during a background
|
||||||
// operation and deciding the severity, and error recovery strategy. The high
|
// operation and deciding the severity, and error recovery strategy. The high
|
||||||
// level algorithm is as follows -
|
// level algorithm is as follows -
|
||||||
|
@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
||||||
// This can also get called as part of a recovery operation. In that case, we
|
// This can also get called as part of a recovery operation. In that case, we
|
||||||
// also track the error separately in recovery_error_ so we can tell in the
|
// also track the error separately in recovery_error_ so we can tell in the
|
||||||
// end whether recovery succeeded or not
|
// end whether recovery succeeded or not
|
||||||
const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
void ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||||
BackgroundErrorReason reason) {
|
BackgroundErrorReason reason) {
|
||||||
db_mutex_->AssertHeld();
|
db_mutex_->AssertHeld();
|
||||||
if (bg_err.ok()) {
|
if (bg_err.ok()) {
|
||||||
return kOkStatus;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ROCKS_LOG_INFO(db_options_.info_log,
|
ROCKS_LOG_INFO(db_options_.info_log,
|
||||||
|
@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||||
} else {
|
} else {
|
||||||
// This error is less severe than previously encountered error. Don't
|
// This error is less severe than previously encountered error. Don't
|
||||||
// take any further action
|
// take any further action
|
||||||
return bg_error_;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||||
if (bg_error_.severity() >= Status::Severity::kHardError) {
|
if (bg_error_.severity() >= Status::Severity::kHardError) {
|
||||||
is_db_stopped_.store(true, std::memory_order_release);
|
is_db_stopped_.store(true, std::memory_order_release);
|
||||||
}
|
}
|
||||||
return bg_error_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the main function for looking at IO related error during the
|
// This is the main function for looking at IO related error during the
|
||||||
|
@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
||||||
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
|
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
|
||||||
// BackgroundErrorReason reason) will be called to handle other error cases
|
// BackgroundErrorReason reason) will be called to handle other error cases
|
||||||
// such as delegating to SstFileManager to handle no space error.
|
// such as delegating to SstFileManager to handle no space error.
|
||||||
const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
void ErrorHandler::SetBGError(const Status& bg_status,
|
||||||
BackgroundErrorReason reason) {
|
BackgroundErrorReason reason) {
|
||||||
db_mutex_->AssertHeld();
|
db_mutex_->AssertHeld();
|
||||||
Status tmp_status = bg_status;
|
Status tmp_status = bg_status;
|
||||||
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
|
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
|
||||||
|
|
||||||
if (bg_io_err.ok()) {
|
if (bg_io_err.ok()) {
|
||||||
return kOkStatus;
|
return;
|
||||||
}
|
}
|
||||||
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
||||||
bg_io_err.ToString().c_str());
|
bg_io_err.ToString().c_str());
|
||||||
|
@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
||||||
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
||||||
&bg_err, db_mutex_, &auto_recovery);
|
&bg_err, db_mutex_, &auto_recovery);
|
||||||
recover_context_ = context;
|
recover_context_ = context;
|
||||||
return bg_error_;
|
return;
|
||||||
} else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
}
|
||||||
(bg_io_err.GetScope() ==
|
if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
||||||
IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
(bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
||||||
bg_io_err.GetRetryable())) {
|
bg_io_err.GetRetryable())) {
|
||||||
// Second, check if the error is a retryable IO error (file scope IO error
|
// Second, check if the error is a retryable IO error (file scope IO error
|
||||||
// is also treated as retryable IO error in RocksDB write path). if it is
|
// is also treated as retryable IO error in RocksDB write path). if it is
|
||||||
// retryable error and its severity is higher than bg_error_, overwrite the
|
// retryable error and its severity is higher than bg_error_, overwrite the
|
||||||
|
@ -447,7 +444,7 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
||||||
"ErrorHandler: Compaction will schedule by itself to resume\n");
|
"ErrorHandler: Compaction will schedule by itself to resume\n");
|
||||||
// Not used in this code path.
|
// Not used in this code path.
|
||||||
new_bg_io_err.PermitUncheckedError();
|
new_bg_io_err.PermitUncheckedError();
|
||||||
return bg_error_;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status::Severity severity;
|
Status::Severity severity;
|
||||||
|
@ -469,10 +466,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
||||||
Status bg_err(new_bg_io_err, severity);
|
Status bg_err(new_bg_io_err, severity);
|
||||||
CheckAndSetRecoveryAndBGError(bg_err);
|
CheckAndSetRecoveryAndBGError(bg_err);
|
||||||
recover_context_ = context;
|
recover_context_ = context;
|
||||||
return StartRecoverFromRetryableBGIOError(bg_io_err);
|
StartRecoverFromRetryableBGIOError(bg_io_err);
|
||||||
} else {
|
return;
|
||||||
return HandleKnownErrors(new_bg_io_err, reason);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HandleKnownErrors(new_bg_io_err, reason);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ErrorHandler::AddFilesToQuarantine(
|
void ErrorHandler::AddFilesToQuarantine(
|
||||||
|
@ -620,23 +618,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
void ErrorHandler::StartRecoverFromRetryableBGIOError(
|
||||||
const IOStatus& io_error) {
|
const IOStatus& io_error) {
|
||||||
db_mutex_->AssertHeld();
|
db_mutex_->AssertHeld();
|
||||||
if (bg_error_.ok()) {
|
if (bg_error_.ok() || io_error.ok()) {
|
||||||
return bg_error_;
|
return;
|
||||||
} else if (io_error.ok()) {
|
}
|
||||||
return kOkStatus;
|
if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
|
||||||
} else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
|
// Auto resume BG error is not enabled
|
||||||
// Auto resume BG error is not enabled, directly return bg_error_.
|
return;
|
||||||
return bg_error_;
|
}
|
||||||
} else if (end_recovery_) {
|
if (end_recovery_) {
|
||||||
// Can temporarily release db mutex
|
// Can temporarily release db mutex
|
||||||
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
|
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
|
||||||
Status::ShutdownInProgress(),
|
Status::ShutdownInProgress(),
|
||||||
db_mutex_);
|
db_mutex_);
|
||||||
db_mutex_->AssertHeld();
|
db_mutex_->AssertHeld();
|
||||||
return bg_error_;
|
return;
|
||||||
}
|
}
|
||||||
RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
|
RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
|
||||||
ROCKS_LOG_INFO(
|
ROCKS_LOG_INFO(
|
||||||
|
@ -664,12 +662,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
||||||
|
|
||||||
recovery_thread_.reset(
|
recovery_thread_.reset(
|
||||||
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
|
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
|
||||||
|
|
||||||
if (recovery_error_.ok()) {
|
|
||||||
return recovery_error_;
|
|
||||||
} else {
|
|
||||||
return bg_error_;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Automatic recover from Retryable BG IO error. Must be called after db
|
// Automatic recover from Retryable BG IO error. Must be called after db
|
||||||
|
|
|
@ -56,7 +56,7 @@ class ErrorHandler {
|
||||||
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
||||||
Status::Code code, Status::SubCode subcode);
|
Status::Code code, Status::SubCode subcode);
|
||||||
|
|
||||||
const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
void SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
||||||
|
|
||||||
Status GetBGError() const { return bg_error_; }
|
Status GetBGError() const { return bg_error_; }
|
||||||
|
|
||||||
|
@ -135,11 +135,10 @@ class ErrorHandler {
|
||||||
// unsorted.
|
// unsorted.
|
||||||
autovector<uint64_t> files_to_quarantine_;
|
autovector<uint64_t> files_to_quarantine_;
|
||||||
|
|
||||||
const Status& HandleKnownErrors(const Status& bg_err,
|
void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
|
||||||
BackgroundErrorReason reason);
|
|
||||||
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
||||||
void RecoverFromNoSpace();
|
void RecoverFromNoSpace();
|
||||||
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
||||||
void RecoverFromRetryableBGIOError();
|
void RecoverFromRetryableBGIOError();
|
||||||
// First, if it is in recovery and the recovery_error is ok. Set the
|
// First, if it is in recovery and the recovery_error is ok. Set the
|
||||||
// recovery_error_ to bg_err. Second, if the severity is higher than the
|
// recovery_error_ to bg_err. Second, if the severity is higher than the
|
||||||
|
|
|
@ -851,13 +851,17 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) {
|
||||||
});
|
});
|
||||||
SyncPoint::GetInstance()->EnableProcessing();
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
s = Flush();
|
s = Flush();
|
||||||
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
ASSERT_TRUE(s.IsNoSpace());
|
||||||
|
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
|
||||||
|
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||||
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
||||||
fault_fs_->SetFilesystemActive(true);
|
fault_fs_->SetFilesystemActive(true);
|
||||||
|
|
||||||
// This Resume() will attempt to create a new manifest file and fail again
|
// This Resume() will attempt to create a new manifest file and fail again
|
||||||
s = dbfull()->Resume();
|
s = dbfull()->Resume();
|
||||||
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
ASSERT_TRUE(s.IsNoSpace());
|
||||||
|
ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
|
||||||
|
ROCKSDB_NAMESPACE::Status::Severity::kHardError);
|
||||||
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
|
||||||
fault_fs_->SetFilesystemActive(true);
|
fault_fs_->SetFilesystemActive(true);
|
||||||
SyncPoint::GetInstance()->ClearAllCallBacks();
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
|
|
Loading…
Reference in New Issue