diff --git a/db/error_handler.cc b/db/error_handler.cc index 9d8ae09705..b5c353a690 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -418,15 +418,18 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err, &bg_err, db_mutex_, &auto_recovery); recover_context_ = context; return bg_error_; - } else if (bg_io_err.GetScope() == - IOStatus::IOErrorScope::kIOErrorScopeFile || - bg_io_err.GetRetryable()) { + } else if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace && + (bg_io_err.GetScope() == + IOStatus::IOErrorScope::kIOErrorScopeFile || + bg_io_err.GetRetryable())) { // Second, check if the error is a retryable IO error (file scope IO error // is also treated as retryable IO error in RocksDB write path). if it is // retryable error and its severity is higher than bg_error_, overwrite the // bg_error_ with new error. In current stage, for retryable IO error of // compaction, treat it as soft error. In other cases, treat the retryable - // IO error as hard error. + // IO error as hard error. Note that, all the NoSpace error should be + // handled by the SstFileManager::StartErrorRecovery(). Therefore, no matter + // it is retryable or file scope, this logic will be bypassed. bool auto_recovery = false; EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &new_bg_io_err, db_mutex_, diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 7cf02f8540..e0456a8273 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -193,6 +193,53 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) { Destroy(options); } +// All the NoSpace IOError will be handled as the regular BG Error no matter the +// retryable flag is set of not. So the auto resume for retryable IO Error will +// not be triggered. Also, it is mapped as hard error. +TEST_F(DBErrorHandlingFSTest, FLushWriteNoSpaceError) { + std::shared_ptr listener( + new ErrorHandlerFSListener()); + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + options.listeners.emplace_back(listener); + options.max_bgerror_resume_count = 2; + options.bgerror_resume_retry_interval = 100000; // 0.1 second + options.statistics = CreateDBStatistics(); + Status s; + + listener->EnableAutoRecovery(false); + DestroyAndReopen(options); + + IOStatus error_msg = IOStatus::NoSpace("Retryable IO Error"); + error_msg.SetRetryable(true); + + ASSERT_OK(Put(Key(1), "val1")); + SyncPoint::GetInstance()->SetCallBack( + "BuildTable:BeforeFinishBuildTable", + [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); + SyncPoint::GetInstance()->EnableProcessing(); + s = Flush(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + SyncPoint::GetInstance()->DisableProcessing(); + fault_fs_->SetFilesystemActive(true); + s = dbfull()->Resume(); + ASSERT_OK(s); + ASSERT_EQ(1, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_BG_ERROR_COUNT)); + ASSERT_EQ(1, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_BG_IO_ERROR_COUNT)); + ASSERT_EQ(0, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT)); + ASSERT_EQ(0, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_AUTORESUME_COUNT)); + ASSERT_EQ(0, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT)); + ASSERT_EQ(0, options.statistics->getAndResetTickerCount( + ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT)); + Destroy(options); +} + TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) { std::shared_ptr listener( new ErrorHandlerFSListener());