All the NoSpace() errors will be handled by regular SetBGError and RecoverFromNoSpace() (#8376)

Summary: In the current logic, any IO Error with retryable flag == true will be handled by the special logic and in most cases, StartRecoverFromRetryableBGIOError will be called to do the auto resume. If the NoSpace error with retryable flag is set during WAL write, it is mapped as a hard error, which will trigger the auto recovery. During the recover process, if write continues and append to the WAL, the write process sees that bg_error is set to HardError and it calls WriteStatusCheck(), which calls SetBGError() with Status (not IOStatus). This will redirect to the regular SetBGError interface, in which recovery_error_ will be set to the corresponding error. With the recovery_error_ set, the auto resume thread created in StartRecoverFromRetryableBGIOError will keep failing as long as user keeps trying to write. To fix this issue. All the NoSpace error (no matter retryable flag is set or not) will be redirect to the regular SetBGError, and RecoverFromNoSpace() will do the recovery job which calls SstFileManager::StartErrorRecovery(). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8376 Test Plan: make check and added the new testing case Reviewed By: anand1976 Differential Revision: D29071828 Pulled By: zhichao-cao fbshipit-source-id: 7171d7e14cc4620fdab49b7eff7a2fe9a89942c2
2021-06-11 14:47:40 -07:00 · 2021-06-11 14:47:40 -07:00 · 58162835d1
parent a42a342a7a
commit 58162835d1
2 changed files with 54 additions and 4 deletions
--- a/db/error_handler.cc
+++ b/db/error_handler.cc
@ -418,15 +418,18 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
                                          &bg_err, db_mutex_, &auto_recovery);
    recover_context_ = context;
    return bg_error_;
-  } else if (bg_io_err.GetScope() ==
-                 IOStatus::IOErrorScope::kIOErrorScopeFile ||
-             bg_io_err.GetRetryable()) {
+  } else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
+             (bg_io_err.GetScope() ==
+                  IOStatus::IOErrorScope::kIOErrorScopeFile ||
+              bg_io_err.GetRetryable())) {
    // Second, check if the error is a retryable IO error (file scope IO error
    // is also treated as retryable IO error in RocksDB write path). if it is
    // retryable error and its severity is higher than bg_error_, overwrite the
    // bg_error_ with new error. In current stage, for retryable IO error of
    // compaction, treat it as soft error. In other cases, treat the retryable
-    // IO error as hard error.
+    // IO error as hard error. Note that, all the NoSpace error should be
+    // handled by the SstFileManager::StartErrorRecovery(). Therefore, no matter
+    // it is retryable or file scope, this logic will be bypassed.
    bool auto_recovery = false;
    EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
                                          &new_bg_io_err, db_mutex_,
--- a/db/error_handler_fs_test.cc
+++ b/db/error_handler_fs_test.cc
@ -193,6 +193,53 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) {
  Destroy(options);
 }

+// All the NoSpace IOError will be handled as the regular BG Error no matter the
+// retryable flag is set of not. So the auto resume for retryable IO Error will
+// not be triggered. Also, it is mapped as hard error.
+TEST_F(DBErrorHandlingFSTest, FLushWriteNoSpaceError) {
+  std::shared_ptr<ErrorHandlerFSListener> listener(
+      new ErrorHandlerFSListener());
+  Options options = GetDefaultOptions();
+  options.env = fault_env_.get();
+  options.create_if_missing = true;
+  options.listeners.emplace_back(listener);
+  options.max_bgerror_resume_count = 2;
+  options.bgerror_resume_retry_interval = 100000;  // 0.1 second
+  options.statistics = CreateDBStatistics();
+  Status s;
+
+  listener->EnableAutoRecovery(false);
+  DestroyAndReopen(options);
+
+  IOStatus error_msg = IOStatus::NoSpace("Retryable IO Error");
+  error_msg.SetRetryable(true);
+
+  ASSERT_OK(Put(Key(1), "val1"));
+  SyncPoint::GetInstance()->SetCallBack(
+      "BuildTable:BeforeFinishBuildTable",
+      [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
+  SyncPoint::GetInstance()->EnableProcessing();
+  s = Flush();
+  ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
+  SyncPoint::GetInstance()->DisableProcessing();
+  fault_fs_->SetFilesystemActive(true);
+  s = dbfull()->Resume();
+  ASSERT_OK(s);
+  ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_BG_ERROR_COUNT));
+  ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_BG_IO_ERROR_COUNT));
+  ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
+  ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_AUTORESUME_COUNT));
+  ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
+  ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
+                   ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
+  Destroy(options);
+}
+
 TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
  std::shared_ptr<ErrorHandlerFSListener> listener(
      new ErrorHandlerFSListener());