mirror of https://github.com/facebook/rocksdb.git
Fix nullptr access and race to fault_fs_guard (#12799)
Summary: **Context/Summary:** There are a couple places where we forgot to check fault_fs_guard before accessing it. So we can see something like this occasionally ``` =138831==Hint: address points to the zero page. SCARINESS: 10 (null-deref) AddressSanitizer:DEADLYSIGNAL #0 0x18b9e0b in rocksdb::ThreadLocalPtr::Get() const fbcode/internal_repo_rocksdb/repo/util/thread_local.cc:503 https://github.com/facebook/rocksdb/issues/1 0x83d8b7 in rocksdb::StressTest::TestCompactRange(rocksdb::ThreadState*, long, rocksdb::Slice const&, rocksdb::ColumnFamilyHandle*) fbcode/internal_repo_rocksdb/repo/utilities/fault_injection_fs.h ``` Also accessing of `io_activties_exempted_from_fault_injection.find` not fully synced so we see the following ``` WARNING: ThreadSanitizer: data race (pid=90939) Write of size 8 at 0x7b4c000004d0 by thread T762 (mutexes: write M0): #0 std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>>::operator=(std::_Rb_tree<rocksdb::Env::IOActivity, rocksdb::Env::IOActivity, std::_Identity<rocksdb::Env::IOActivity>, std::less<rocksdb::Env::IOActivity>, std::allocator<rocksdb::Env::IOActivity>> const&) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:208 (db_stress+0x411c32) (BuildId: b803e5aca22c6b080defed8e85b7bfec) https://github.com/facebook/rocksdb/issues/1 rocksdb::DbStressListener::OnErrorRecoveryCompleted(rocksdb::Status) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_set.h:298 (db_stress+0x4112e5) (BuildId: b803e5aca22c6b080defed8e85b7bfec) https://github.com/facebook/rocksdb/issues/2 rocksdb::EventHelpers::NotifyOnErrorRecoveryEnd(std::vector<std::shared_ptr<rocksdb::EventListener>, std::allocator<std::shared_ptr<rocksdb::EventListener>>> const&, rocksdb::Status const&, rocksdb::Status const&, rocksdb::InstrumentedMutex*) fbcode/internal_repo_rocksdb/repo/db/event_helpers.cc:239 (db_stress+0xa09d60) (BuildId: b803e5aca22c6b080defed8e85b7bfec) Previous read of size 8 at 0x7b4c000004d0 by thread T131 (mutexes: write M1): #0 rocksdb::FaultInjectionTestFS::MaybeInjectThreadLocalError(rocksdb::FaultInjectionIOType, rocksdb::IOOptions const&, rocksdb::FaultInjectionTestFS::ErrorOperation, rocksdb::Slice*, bool, char*, bool, bool*) fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/stl_tree.h:798 (db_stress+0xf7d0f3) (BuildId: b803e5aca22c6b080defed8e85b7bfec) ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12799 Test Plan: CI Reviewed By: jowlyzhang Differential Revision: D58917449 Pulled By: hx235 fbshipit-source-id: f24fc1acc2a7d91f9f285447a97ba41397f48dbd
This commit is contained in:
parent
e3f5125ff1
commit
56f7ef50d7
|
@ -359,10 +359,10 @@ class CfConsistencyStressTest : public StressTest {
|
|||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
}
|
||||
|
||||
if (s.ok() || s.IsNotFound()) {
|
||||
const bool cmp_found = s.ok();
|
||||
|
||||
|
|
|
@ -19,8 +19,6 @@
|
|||
|
||||
ROCKSDB_NAMESPACE::Env* db_stress_listener_env = nullptr;
|
||||
ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr;
|
||||
// If non-null, injects read error at a rate specified by the
|
||||
// read_fault_one_in or write_fault_one_in flag
|
||||
std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
|
||||
std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> compressed_secondary_cache;
|
||||
std::shared_ptr<ROCKSDB_NAMESPACE::Cache> block_cache;
|
||||
|
|
|
@ -1040,6 +1040,10 @@ void StressTest::OperateDb(ThreadState* thread) {
|
|||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
}
|
||||
|
||||
// Verify no writes during LockWAL
|
||||
|
@ -1098,6 +1102,10 @@ void StressTest::OperateDb(ThreadState* thread) {
|
|||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2476,13 +2484,19 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
|
|||
std::vector<std::string> files;
|
||||
|
||||
// Temporarily disable error injection to print debugging information
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
}
|
||||
|
||||
Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files);
|
||||
|
||||
// Enable back disable error injection disabled for printing debugging
|
||||
// information
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
}
|
||||
assert(my_s.ok());
|
||||
|
||||
for (const auto& f : files) {
|
||||
|
@ -2964,19 +2978,33 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
|
|||
uint32_t pre_hash = 0;
|
||||
if (thread->rand.OneIn(2)) {
|
||||
// Temporarily disable error injection to for validation
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
}
|
||||
|
||||
// Declare a snapshot and compare the data before and after the compaction
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
pre_snapshot = db_->GetSnapshot();
|
||||
pre_hash =
|
||||
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
|
||||
|
||||
// Enable back error injection disabled for validation
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
}
|
||||
}
|
||||
std::ostringstream compact_range_opt_oss;
|
||||
compact_range_opt_oss << "exclusive_manual_compaction: "
|
||||
|
@ -3012,11 +3040,16 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
|
|||
|
||||
if (pre_snapshot != nullptr) {
|
||||
// Temporarily disable error injection for validation
|
||||
// Declaring a snapshot and compare the data before and after the compaction
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
}
|
||||
uint32_t post_hash =
|
||||
GetRangeHash(thread, pre_snapshot, column_family, start_key, end_key);
|
||||
if (pre_hash != post_hash) {
|
||||
|
@ -3032,11 +3065,17 @@ void StressTest::TestCompactRange(ThreadState* thread, int64_t rand_key,
|
|||
thread->shared->SetVerificationFailure();
|
||||
}
|
||||
db_->ReleaseSnapshot(pre_snapshot);
|
||||
// Enable back error injection disabled for validation
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
if (fault_fs_guard) {
|
||||
// Enable back error injection disabled for validation
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1607,14 +1607,13 @@ class NonBatchedOpsStressTest : public StressTest {
|
|||
|
||||
if (FLAGS_verify_before_write) {
|
||||
// Temporarily disable error injection for preparation
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
}
|
||||
|
||||
std::string from_db;
|
||||
Status s = db_->Get(read_opts, cfh, k, &from_db);
|
||||
bool res = VerifyOrSyncValue(
|
||||
|
@ -1622,14 +1621,12 @@ class NonBatchedOpsStressTest : public StressTest {
|
|||
/* msg_prefix */ "Pre-Put Get verification", from_db, s);
|
||||
|
||||
// Enable back error injection disabled for preparation
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kWrite);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
}
|
||||
if (!res) {
|
||||
return s;
|
||||
}
|
||||
|
@ -1897,19 +1894,24 @@ class NonBatchedOpsStressTest : public StressTest {
|
|||
std::ostringstream ingest_options_oss;
|
||||
|
||||
// Temporarily disable error injection for preparation
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->DisableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
}
|
||||
|
||||
if (db_stress_env->FileExists(sst_filename).ok()) {
|
||||
// Maybe we terminated abnormally before, so cleanup to give this file
|
||||
// ingestion a clean slate
|
||||
s = db_stress_env->DeleteFile(sst_filename);
|
||||
}
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
if (fault_fs_guard) {
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataRead);
|
||||
fault_fs_guard->EnableThreadLocalErrorInjection(
|
||||
FaultInjectionIOType::kMetadataWrite);
|
||||
}
|
||||
|
||||
SstFileWriter sst_file_writer(EnvOptions(options_), options_);
|
||||
if (s.ok()) {
|
||||
|
|
|
@ -1228,8 +1228,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalReadError(
|
|||
ErrorContext* ctx =
|
||||
static_cast<ErrorContext*>(injected_thread_local_read_error_.Get());
|
||||
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
|
||||
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
|
||||
io_activties_exempted_from_fault_injection.end()) {
|
||||
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
|
@ -1306,8 +1305,7 @@ IOStatus FaultInjectionTestFS::MaybeInjectThreadLocalError(
|
|||
|
||||
ErrorContext* ctx = GetErrorContextFromFaultInjectionIOType(type);
|
||||
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in ||
|
||||
io_activties_exempted_from_fault_injection.find(io_options.io_activity) !=
|
||||
io_activties_exempted_from_fault_injection.end()) {
|
||||
ShouldIOActivtiesExemptFromFaultInjection(io_options.io_activity)) {
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -425,6 +425,12 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|||
io_activties_exempted_from_fault_injection = io_activties;
|
||||
}
|
||||
|
||||
bool ShouldIOActivtiesExemptFromFaultInjection(Env::IOActivity io_activty) {
|
||||
MutexLock l(&mutex_);
|
||||
return io_activties_exempted_from_fault_injection.find(io_activty) !=
|
||||
io_activties_exempted_from_fault_injection.end();
|
||||
}
|
||||
|
||||
void AssertNoOpenFile() { assert(open_managed_files_.empty()); }
|
||||
|
||||
IOStatus GetError() { return fs_error_; }
|
||||
|
|
Loading…
Reference in New Issue