Fix nullptr access and race to fault_fs_guard (#12799)

Summary:
**Context/Summary:**

There are a couple places where we forgot to check fault_fs_guard before accessing it. So we can see something like this occasionally

```
=138831==Hint: address points to the zero page.
SCARINESS: 10 (null-deref)
AddressSanitizer:DEADLYSIGNAL
    #0 0x18b9e0b in rocksdb::ThreadLocalPtr::Get() const fbcode/internal_repo_rocksdb/repo/util/thread_local.cc:503
    https://github.com/facebook/rocksdb/issues/1 0x83d8b7 in rocksdb::StressTest::TestCompactRange(rocksdb::ThreadState*, long, rocksdb::Slice const&, rocksdb::ColumnFamilyHandle*) fbcode/internal_repo_rocksdb/repo/utilities/fault_injection_fs.h
```
Also accessing of `io_activties_exempted_from_fault_injection.find` not fully synced so we see the following
```
WARNING: ThreadSanitizer: data race (pid=90939)
  Write of size 8 at 0x7b4c000004d0 by thread T762 (mutexes: write M0):
    #0 std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>>::operator=(std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>> const&) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:208 (db_stress+0x411c32) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
    https://github.com/facebook/rocksdb/issues/1 rocksdb::DbStressListener::OnErrorRecoveryCompleted(rocksdb::Status) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_set.h:298 (db_stress+0x4112e5) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
    https://github.com/facebook/rocksdb/issues/2 rocksdb::EventHelpers::NotifyOnErrorRecoveryEnd(std::vector<std::shared_ptr<rocksdb::EventListener>, std::allocator<std::shared_ptr<rocksdb::EventListener>>> const&, rocksdb::Status const&, rocksdb::Status const&, rocksdb::InstrumentedMutex*) fbcode/internal_repo_rocksdb/repo/db/event_helpers.cc:239 (db_stress+0xa09d60) (BuildId: b803e5aca22c6b080defed8e85b7bfec)

  Previous read of size 8 at 0x7b4c000004d0 by thread T131 (mutexes: write M1):
    #0 rocksdb::FaultInjectionTestFS::MaybeInjectThreadLocalError(rocksdb::FaultInjectionIOType, rocksdb::IOOptions const&, rocksdb::FaultInjectionTestFS::ErrorOperation, rocksdb::Slice*, bool, char*, bool, bool*) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:798 (db_stress+0xf7d0f3) (BuildId: b803e5aca22c6b080defed8e85b7bfec)
```

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12799

Test Plan: CI

Reviewed By: jowlyzhang

Differential Revision: D58917449

Pulled By: hx235

fbshipit-source-id: f24fc1acc2a7d91f9f285447a97ba41397f48dbd
This commit is contained in:
Hui Xiao 2024-06-24 16:10:36 -07:00 committed by Facebook GitHub Bot
parent e3f5125ff1
commit 56f7ef50d7
6 changed files with 96 additions and 53 deletions

View File

@ -359,10 +359,10 @@ class CfConsistencyStressTest : public StressTest {
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
}
if (s.ok() || s.IsNotFound()) {
const bool cmp_found = s.ok();

View File

@ -19,8 +19,6 @@
ROCKSDB_NAMESPACE::Env* db_stress_listener_env = nullptr;
ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr;
// If non-null, injects read error at a rate specified by the
// read_fault_one_in or write_fault_one_in flag
std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> compressed_secondary_cache;
std::shared_ptr<ROCKSDB_NAMESPACE::Cache> block_cache;

View File

@ -1040,6 +1040,10 @@ void StressTest::OperateDb(ThreadState* thread) {
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
}
// Verify no writes during LockWAL
@ -1098,6 +1102,10 @@ void StressTest::OperateDb(ThreadState* thread) {
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
}
}
}
@ -2476,13 +2484,19 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
std::vector<std::string> files;
// Temporarily disable error injection to print debugging information
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
}
Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files);
// Enable back disable error injection disabled for printing debugging
// information
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
}
assert(my_s.ok());
for (const auto& f : files) {
@ -2964,19 +2978,33 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
uint32_t pre_hash = 0;
if (thread->rand.OneIn(2)) {
// Temporarily disable error injection to for validation
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
}
// Declare a snapshot and compare the data before and after the compaction
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
pre_snapshot = db_->GetSnapshot();
pre_hash =
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
// Enable back error injection disabled for validation
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
}
}
std::ostringstream compact_range_opt_oss;
compact_range_opt_oss << "exclusive_manual_compaction: "
@ -3012,11 +3040,16 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
if (pre_snapshot != nullptr) {
// Temporarily disable error injection for validation
// Declaring a snapshot and compare the data before and after the compaction
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
}
uint32_t post_hash =
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
if (pre_hash != post_hash) {
@ -3032,11 +3065,17 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
thread->shared->SetVerificationFailure();
}
db_->ReleaseSnapshot(pre_snapshot);
// Enable back error injection disabled for validation
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
if (fault_fs_guard) {
// Enable back error injection disabled for validation
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
}
}
}

View File

@ -1607,14 +1607,13 @@ class NonBatchedOpsStressTest : public StressTest {
if (FLAGS_verify_before_write) {
// Temporarily disable error injection for preparation
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
}
std::string from_db;
Status s = db_->Get(read_opts, cfh, k, &from_db);
bool res = VerifyOrSyncValue(
@ -1622,14 +1621,12 @@ class NonBatchedOpsStressTest : public StressTest {
/* msg_prefix */ "Pre-Put Get verification", from_db, s);
// Enable back error injection disabled for preparation
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kWrite);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
}
if (!res) {
return s;
}
@ -1897,19 +1894,24 @@ class NonBatchedOpsStressTest : public StressTest {
std::ostringstream ingest_options_oss;
// Temporarily disable error injection for preparation
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
if (fault_fs_guard) {
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->DisableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
}
if (db_stress_env->FileExists(sst_filename).ok()) {
// Maybe we terminated abnormally before, so cleanup to give this file
// ingestion a clean slate
s = db_stress_env->DeleteFile(sst_filename);
}
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
if (fault_fs_guard) {
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataRead);
fault_fs_guard->EnableThreadLocalErrorInjection(
FaultInjectionIOType::kMetadataWrite);
}
SstFileWriter sst_file_writer(EnvOptions(options_), options_);
if (s.ok()) {

View File

@ -1228,8 +1228,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
ErrorContext* ctx =
static_cast<ErrorContext*>(injected_thread_local_read_error_.Get());
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
io_activties_exempted_from_fault_injection.end()) {
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
return IOStatus::OK();
}
@ -1306,8 +1305,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalError(
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
io_activties_exempted_from_fault_injection.end()) {
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
return IOStatus::OK();
}

View File

@ -425,6 +425,12 @@ class FaultInjectionTestFS : public FileSystemWrapper {
io_activties_exempted_from_fault_injection = io_activties;
}
bool ShouldIOActivtiesExemptFromFaultInjection(Env::IOActivity io_activty) {
MutexLock l(&mutex_);
return io_activties_exempted_from_fault_injection.find(io_activty) !=
io_activties_exempted_from_fault_injection.end();
}
void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
IOStatus GetError() { return fs_error_; }