From 39a07c9651c174147738740c813d17bdd7272b9d Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 16 Jul 2021 16:08:14 -0700 Subject: [PATCH] DB Stress Reopen write failure to skip WAL (#8548) Summary: When DB Stress enables write failure in reopen, WAL files are also created with a wrapper writalbe file which buffers write until fsync. However, crash test currently expects all writes to WAL is persistent. This is at odd with the unsynced bytes dropped. To work it around temporarily, we disable WAL write failure for now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8548 Test Plan: Run db_stress. Manual printf to make sure only WAL files are skipped. Reviewed By: jay-zhuang Differential Revision: D29745095 fbshipit-source-id: 1879dd2c01abad7879ca243ee94570ec47c347f3 --- db_stress_tool/db_stress_test_base.cc | 8 ++++++++ utilities/fault_injection_fs.cc | 19 ++++++++++++------- utilities/fault_injection_fs.h | 25 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index fe6f9381da..a7a23707f7 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2492,6 +2492,13 @@ void StressTest::Open() { FLAGS_open_metadata_write_fault_one_in); } if (ingest_write_error) { + if (!FLAGS_sync) { + // When DB Stress is not sync mode, we expect all WAL writes to + // WAL is durable. Buffering unsynced writes will cause false + // positive in crash tests. Before we figure out a way to + // solve it, skip WAL from failure injection. + fault_fs_guard->SetSkipDirectWritableTypes({kWalFile}); + } fault_fs_guard->SetFilesystemDirectWritable(false); fault_fs_guard->EnableWriteErrorInjection(); fault_fs_guard->SetRandomWriteError( @@ -2539,6 +2546,7 @@ void StressTest::Open() { fault_fs_guard->SetFilesystemDirectWritable(true); fault_fs_guard->DisableMetadataWriteErrorInjection(); fault_fs_guard->DisableWriteErrorInjection(); + fault_fs_guard->SetSkipDirectWritableTypes({}); fault_fs_guard->SetRandomReadError(0); if (s.ok()) { // Ingested errors might happen in background compactions. We diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 392a1146cf..af6f48342f 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -401,7 +401,8 @@ IOStatus FaultInjectionTestFS::NewWritableFile( return in_s; } } - if (IsFilesystemDirectWritable()) { + + if (ShouldUseDiretWritable(fname)) { return target()->NewWritableFile(fname, file_opts, result, dbg); } @@ -438,7 +439,7 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( if (!IsFilesystemActive()) { return GetError(); } - if (IsFilesystemDirectWritable()) { + if (ShouldUseDiretWritable(fname)) { return target()->ReopenWritableFile(fname, file_opts, result, dbg); } { @@ -477,7 +478,7 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile( if (!IsFilesystemActive()) { return GetError(); } - if (IsFilesystemDirectWritable()) { + if (ShouldUseDiretWritable(fname)) { return target()->NewRandomRWFile(fname, file_opts, result, dbg); } { @@ -806,6 +807,13 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError(ErrorOperation op, return IOStatus::OK(); } +bool FaultInjectionTestFS::TryParseFileName(const std::string& file_name, + uint64_t* number, FileType* type) { + std::size_t found = file_name.find_last_of("/"); + std::string file = file_name.substr(found); + return ParseFileName(file, number, type); +} + IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) { MutexLock l(&mutex_); if (!enable_write_error_injection_ || !write_error_one_in_) { @@ -818,10 +826,7 @@ IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) { } else { uint64_t number; FileType cur_type = kTempFile; - std::size_t found = file_name.find_last_of("/"); - std::string file = file_name.substr(found); - bool ret = ParseFileName(file, &number, &cur_type); - if (ret) { + if (TryParseFileName(file_name, &number, &cur_type)) { for (const auto& type : write_error_allowed_types_) { if (cur_type == type) { allowed_type = true; diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index eb3be049ef..625911af3a 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -286,6 +286,19 @@ class FaultInjectionTestFS : public FileSystemWrapper { MutexLock l(&mutex_); return filesystem_writable_; } + bool ShouldUseDiretWritable(const std::string& file_name) { + MutexLock l(&mutex_); + if (filesystem_writable_) { + return true; + } + FileType file_type = kTempFile; + uint64_t file_number = 0; + if (!TryParseFileName(file_name, &file_number, &file_type)) { + return false; + } + return skip_direct_writable_types_.find(file_type) != + skip_direct_writable_types_.end(); + } void SetFilesystemActiveNoLock( bool active, IOStatus error = IOStatus::Corruption("Not active")) { error.PermitUncheckedError(); @@ -396,6 +409,11 @@ class FaultInjectionTestFS : public FileSystemWrapper { write_error_allowed_types_ = types; } + void SetSkipDirectWritableTypes(const std::set& types) { + MutexLock l(&mutex_); + skip_direct_writable_types_ = types; + } + void SetRandomMetadataWriteError(int one_in) { MutexLock l(&mutex_); metadata_write_error_one_in_ = one_in; @@ -528,9 +546,16 @@ class FaultInjectionTestFS : public FileSystemWrapper { std::atomic read_error_one_in_; bool inject_for_all_file_types_; std::vector write_error_allowed_types_; + // File types where direct writable is skipped. + std::set skip_direct_writable_types_; bool ingest_data_corruption_before_write_; ChecksumType checksum_handoff_func_tpye_; bool fail_get_file_unique_id_; + + // Extract number of type from file name. Return false if failing to fine + // them. + bool TryParseFileName(const std::string& file_name, uint64_t* number, + FileType* type); }; } // namespace ROCKSDB_NAMESPACE