Remove the return value of `SetBGError()` (#12792)

Summary: the return value for `ErrorHandler::SetBGError(error)` seems to be not well-defined, it can be `bg_error_` (no matter if the `bg_error_` is set to the input error), ok status or [`recovery_error_`](3ee4d5a11a/db/error_handler.cc (L669)) from `StartRecoverFromRetryableBGIOError()`. The `recovery_error_` returned may be an OK status. We have only a few places that use the return value of `SetBGError()` and they don't need to do so. Using the return value may even be wrong for example in 3ee4d5a11a/db/db_impl/db_impl_write.cc (L2365) where a non-ok `s` could be overwritten to OK. This PR changes SetBGError() to return void and clean up relevant code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12792 Test Plan: existing unit tests and go over all places where return value of `SetBGError()` is used. Reviewed By: hx235 Differential Revision: D58904898 Pulled By: cbi42 fbshipit-source-id: d58a20ba5a40e3f35367c6034a32c755088c3653
2024-06-26 18:17:05 -07:00 · 2024-06-26 18:17:05 -07:00 · a31fe52173
parent 0d93c8a6ca
commit a31fe52173
5 changed files with 45 additions and 51 deletions
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@ -391,8 +391,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
      if (!s.ok()) {
        io_s = versions_->io_status();
        if (!io_s.ok()) {
-          s = error_handler_.SetBGError(io_s,
-                                        BackgroundErrorReason::kManifestWrite);
+          error_handler_.SetBGError(io_s,
+                                    BackgroundErrorReason::kManifestWrite);
        }
      }
    }
@ -916,8 +916,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
            read_options, write_options, &edit, &mutex_,
            directories_.GetDbDir());
        if (!s.ok() && versions_->io_status().IsIOError()) {
-          s = error_handler_.SetBGError(versions_->io_status(),
-                                        BackgroundErrorReason::kManifestWrite);
+          error_handler_.SetBGError(versions_->io_status(),
+                                    BackgroundErrorReason::kManifestWrite);
        }
      }

@ -1724,8 +1724,8 @@ Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
      read_options, write_options, synced_wals, &mutex_,
      directories_.GetDbDir());
  if (!status.ok() && versions_->io_status().IsIOError()) {
-    status = error_handler_.SetBGError(versions_->io_status(),
-                                       BackgroundErrorReason::kManifestWrite);
+    error_handler_.SetBGError(versions_->io_status(),
+                              BackgroundErrorReason::kManifestWrite);
  }
  return status;
 }
--- a/db/db_impl/db_impl_write.cc
+++ b/db/db_impl/db_impl_write.cc
@ -1204,8 +1204,7 @@ void DBImpl::MemTableInsertStatusCheck(const Status& status) {
    mutex_.Lock();
    assert(!error_handler_.IsBGWorkStopped());
    // Maybe change the return status to void?
-    error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable)
-        .PermitUncheckedError();
+    error_handler_.SetBGError(status, BackgroundErrorReason::kMemTable);
    mutex_.Unlock();
  }
 }
@ -2363,8 +2362,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
          read_options, write_options, &wal_deletion, &mutex_,
          directories_.GetDbDir());
      if (!s.ok() && versions_->io_status().IsIOError()) {
-        s = error_handler_.SetBGError(versions_->io_status(),
-                                      BackgroundErrorReason::kManifestWrite);
+        error_handler_.SetBGError(versions_->io_status(),
+                                  BackgroundErrorReason::kManifestWrite);
      }
      if (!s.ok()) {
        return s;
--- a/db/error_handler.cc
+++ b/db/error_handler.cc
@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() {
  EndAutoRecovery();
 }

-STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
-
 // This is the main function for looking at an error during a background
 // operation and deciding the severity, and error recovery strategy. The high
 // level algorithm is as follows -
@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
 // This can also get called as part of a recovery operation. In that case, we
 // also track the error separately in recovery_error_ so we can tell in the
 // end whether recovery succeeded or not
-const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
-                                              BackgroundErrorReason reason) {
+void ErrorHandler::HandleKnownErrors(const Status& bg_err,
+                                     BackgroundErrorReason reason) {
  db_mutex_->AssertHeld();
  if (bg_err.ok()) {
-    return kOkStatus;
+    return;
  }

  ROCKS_LOG_INFO(db_options_.info_log,
@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
    } else {
      // This error is less severe than previously encountered error. Don't
      // take any further action
-      return bg_error_;
+      return;
    }
  }

@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
  if (bg_error_.severity() >= Status::Severity::kHardError) {
    is_db_stopped_.store(true, std::memory_order_release);
  }
-  return bg_error_;
 }

 // This is the main function for looking at IO related error during the
@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
 // 3) for other cases, HandleKnownErrors(const Status& bg_err,
 //    BackgroundErrorReason reason) will be called to handle other error cases
 //    such as delegating to SstFileManager to handle no space error.
-const Status& ErrorHandler::SetBGError(const Status& bg_status,
-                                       BackgroundErrorReason reason) {
+void ErrorHandler::SetBGError(const Status& bg_status,
+                              BackgroundErrorReason reason) {
  db_mutex_->AssertHeld();
  Status tmp_status = bg_status;
  IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));

  if (bg_io_err.ok()) {
-    return kOkStatus;
+    return;
  }
  ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
                 bg_io_err.ToString().c_str());
@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
                                          &bg_err, db_mutex_, &auto_recovery);
    recover_context_ = context;
-    return bg_error_;
-  } else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
-             (bg_io_err.GetScope() ==
-                  IOStatus::IOErrorScope::kIOErrorScopeFile ||
-              bg_io_err.GetRetryable())) {
+    return;
+  }
+  if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
+      (bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
+       bg_io_err.GetRetryable())) {
    // Second, check if the error is a retryable IO error (file scope IO error
    // is also treated as retryable IO error in RocksDB write path). if it is
    // retryable error and its severity is higher than bg_error_, overwrite the
@ -447,7 +444,7 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
          "ErrorHandler: Compaction will schedule by itself to resume\n");
      // Not used in this code path.
      new_bg_io_err.PermitUncheckedError();
-      return bg_error_;
+      return;
    }

    Status::Severity severity;
@ -469,10 +466,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
    Status bg_err(new_bg_io_err, severity);
    CheckAndSetRecoveryAndBGError(bg_err);
    recover_context_ = context;
-    return StartRecoverFromRetryableBGIOError(bg_io_err);
-  } else {
-    return HandleKnownErrors(new_bg_io_err, reason);
+    StartRecoverFromRetryableBGIOError(bg_io_err);
+    return;
  }
+
+  HandleKnownErrors(new_bg_io_err, reason);
 }

 void ErrorHandler::AddFilesToQuarantine(
@ -620,23 +618,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
  return s;
 }

-const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
+void ErrorHandler::StartRecoverFromRetryableBGIOError(
    const IOStatus& io_error) {
  db_mutex_->AssertHeld();
-  if (bg_error_.ok()) {
-    return bg_error_;
-  } else if (io_error.ok()) {
-    return kOkStatus;
-  } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
-    // Auto resume BG error is not enabled, directly return bg_error_.
-    return bg_error_;
-  } else if (end_recovery_) {
+  if (bg_error_.ok() || io_error.ok()) {
+    return;
+  }
+  if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
+    // Auto resume BG error is not enabled
+    return;
+  }
+  if (end_recovery_) {
    // Can temporarily release db mutex
    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
                                           Status::ShutdownInProgress(),
                                           db_mutex_);
    db_mutex_->AssertHeld();
-    return bg_error_;
+    return;
  }
  RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
  ROCKS_LOG_INFO(
@ -664,12 +662,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(

  recovery_thread_.reset(
      new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
-
-  if (recovery_error_.ok()) {
-    return recovery_error_;
-  } else {
-    return bg_error_;
-  }
 }

 // Automatic recover from Retryable BG IO error. Must be called after db
--- a/db/error_handler.h
+++ b/db/error_handler.h
@ -56,7 +56,7 @@ class ErrorHandler {
  Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
                                    Status::Code code, Status::SubCode subcode);

-  const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
+  void SetBGError(const Status& bg_err, BackgroundErrorReason reason);

  Status GetBGError() const { return bg_error_; }

@ -135,11 +135,10 @@ class ErrorHandler {
  // unsorted.
  autovector<uint64_t> files_to_quarantine_;

-  const Status& HandleKnownErrors(const Status& bg_err,
-                                  BackgroundErrorReason reason);
+  void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
  Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
  void RecoverFromNoSpace();
-  const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
+  void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
  void RecoverFromRetryableBGIOError();
  // First, if it is in recovery and the recovery_error is ok. Set the
  // recovery_error_ to bg_err. Second, if the severity is higher than the
--- a/db/error_handler_fs_test.cc
+++ b/db/error_handler_fs_test.cc
@ -851,13 +851,17 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) {
      });
  SyncPoint::GetInstance()->EnableProcessing();
  s = Flush();
-  ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
+  ASSERT_TRUE(s.IsNoSpace());
+  ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
+            ROCKSDB_NAMESPACE::Status::Severity::kHardError);
  ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
  fault_fs_->SetFilesystemActive(true);

  // This Resume() will attempt to create a new manifest file and fail again
  s = dbfull()->Resume();
-  ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
+  ASSERT_TRUE(s.IsNoSpace());
+  ASSERT_EQ(dbfull()->TEST_GetBGError().severity(),
+            ROCKSDB_NAMESPACE::Status::Severity::kHardError);
  ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty());
  fault_fs_->SetFilesystemActive(true);
  SyncPoint::GetInstance()->ClearAllCallBacks();