rocksdb/db/error_handler.h
Yu Zhang 509947ce2c Quarantine files in a limbo state after a manifest error (#12030)
Summary:
Part of the procedures to handle manifest IO error is to disable file deletion in case some files in limbo state get deleted prematurely. This is not ideal because: 1) not all the VersionEdits whose commit encounter such an error contain updates for files, disabling file deletion sometimes are not necessary. 2) `EnableFileDeletion` has a force mode that could make other threads accidentally disrupt this procedure in recovery.  3) Disabling file deletion as a whole is also not as efficient as more precisely tracking impacted files from being prematurely deleted.  This PR replaces this mechanism with tracking such files and quarantine them from being deleted in `ErrorHandler`.

These are the types of files being actively tracked in quarantine in this PR:
1) new table files and blob files from a background job
2) old manifest file whose immediately following new manifest file's CURRENT file creation gets into unclear state. Current handling is not sufficient to make sure the old manifest file is kept in case it's needed.

Note that WAL logs are not part of the quarantine because `min_log_number_to_keep` is a safe mechanism and it's only updated after successful manifest commits so it can prevent this premature deletion issue from happening.

We track these files' file numbers because they share the same file number space.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12030

Test Plan: Modified existing unit tests

Reviewed By: ajkr

Differential Revision: D51036774

Pulled By: jowlyzhang

fbshipit-source-id: 84ef26271fbbc888ef70da5c40fe843bd7038716
2023-11-11 08:11:11 -08:00

144 lines
4.8 KiB
C++

// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <sstream>
#include "monitoring/instrumented_mutex.h"
#include "options/db_options.h"
#include "rocksdb/io_status.h"
#include "rocksdb/listener.h"
#include "rocksdb/status.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
class DBImpl;
// This structure is used to store the DB recovery context. The context is
// the information that related to the recover actions. For example, it contains
// FlushReason, which tells the flush job why this flush is called.
struct DBRecoverContext {
FlushReason flush_reason;
bool flush_after_recovery;
DBRecoverContext()
: flush_reason(FlushReason::kErrorRecovery),
flush_after_recovery(false) {}
DBRecoverContext(FlushReason reason)
: flush_reason(reason), flush_after_recovery(false) {}
};
class ErrorHandler {
public:
ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
InstrumentedMutex* db_mutex)
: db_(db),
db_options_(db_options),
cv_(db_mutex),
end_recovery_(false),
recovery_thread_(nullptr),
db_mutex_(db_mutex),
auto_recovery_(false),
recovery_in_prog_(false),
soft_error_no_bg_work_(false),
is_db_stopped_(false),
bg_error_stats_(db_options.statistics) {
// Clear the checked flag for uninitialized errors
bg_error_.PermitUncheckedError();
recovery_error_.PermitUncheckedError();
}
void EnableAutoRecovery() { auto_recovery_ = true; }
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
Status::Code code, Status::SubCode subcode);
const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
Status GetBGError() const { return bg_error_; }
Status GetRecoveryError() const { return recovery_error_; }
Status ClearBGError();
bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
bool IsBGWorkStopped() {
assert(db_mutex_);
db_mutex_->AssertHeld();
return !bg_error_.ok() &&
(bg_error_.severity() >= Status::Severity::kHardError ||
!auto_recovery_ || soft_error_no_bg_work_);
}
bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
bool IsRecoveryInProgress() { return recovery_in_prog_; }
Status RecoverFromBGError(bool is_manual = false);
void CancelErrorRecovery();
void EndAutoRecovery();
void AddFilesToQuarantine(
autovector<const autovector<uint64_t>*> files_to_quarantine);
const autovector<uint64_t>& GetFilesToQuarantine() const {
db_mutex_->AssertHeld();
return files_to_quarantine_;
}
void ClearFilesToQuarantine();
private:
DBImpl* db_;
const ImmutableDBOptions& db_options_;
Status bg_error_;
// A separate Status variable used to record any errors during the
// recovery process from hard errors
IOStatus recovery_error_;
// The condition variable used with db_mutex during auto resume for time
// wait.
InstrumentedCondVar cv_;
bool end_recovery_;
std::unique_ptr<port::Thread> recovery_thread_;
InstrumentedMutex* db_mutex_;
// A flag indicating whether automatic recovery from errors is enabled
bool auto_recovery_;
bool recovery_in_prog_;
// A flag to indicate that for the soft error, we should not allow any
// background work except the work is from recovery.
bool soft_error_no_bg_work_;
// Used to store the context for recover, such as flush reason.
DBRecoverContext recover_context_;
std::atomic<bool> is_db_stopped_;
// The pointer of DB statistics.
std::shared_ptr<Statistics> bg_error_stats_;
// During recovery from manifest IO errors, files whose VersionEdits entries
// could be in an ambiguous state are quarantined and file deletion refrain
// from deleting them. Successful recovery will clear this vector. Files are
// added to this vector while DB mutex was locked, this data structure is
// unsorted.
autovector<uint64_t> files_to_quarantine_;
const Status& HandleKnownErrors(const Status& bg_err,
BackgroundErrorReason reason);
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
void RecoverFromNoSpace();
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
void RecoverFromRetryableBGIOError();
// First, if it is in recovery and the recovery_error is ok. Set the
// recovery_error_ to bg_err. Second, if the severity is higher than the
// current bg_error_, overwrite it.
void CheckAndSetRecoveryAndBGError(const Status& bg_err);
};
} // namespace ROCKSDB_NAMESPACE