mirror of
https://github.com/facebook/rocksdb.git
synced 2024-12-04 20:02:50 +00:00
f79b3d19a7
Summary: **Context/Summary:** Previous experience with bugs and flaky tests taught us there exist features in RocksDB vulnerable to race condition caused by acquiring db mutex at a particular timing. This PR aggressively exposes those vulnerable features by injecting spurious wakeup and sleep to cause acquiring db mutex at various timing in order to expose such race condition **Testing:** - `COERCE_CONTEXT_SWITCH=1 make -j56 check / make -j56 db_stress` should reveal - flaky tests caused by db mutex related race condition - Reverted https://github.com/facebook/rocksdb/pull/9528 - A/B testing on `COMPILE_WITH_TSAN=1 make -j56 listener_test` w/ and w/o `COERCE_CONTEXT_SWITCH=1` followed by `./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10` - `COERCE_CONTEXT_SWITCH=1` can cause expected test failure (i.e, expose target TSAN data race error) within 10 run while the other couldn't. - This proves our injection can expose flaky tests caused by db mutex related race condition faster. - known or new race-condition-type of internal bug by continuously running this PR - Performance - High ops-threads time: COERCE_CONTEXT_SWITCH=1 regressed by 4 times slower (2:01.16 vs 0:22.10 elapsed ). This PR will be run as a separate CI job so this regression won't affect any existing job. ``` TEST_TMPDIR=$db /usr/bin/time ./db_stress \ --ops_per_thread=100000 --expected_values_dir=$exp --clear_column_family_one_in=0 \ --write_buffer_size=524288 —target_file_size_base=524288 —ingest_external_file_one_in=100 —compact_files_one_in=1000 —compact_range_one_in=1000 ``` - Start-up time: COERCE_CONTEXT_SWITCH=1 didn't regress by 25% (0:01.51 vs 0:01.29 elapsed) ``` TEST_TMPDIR=$db ./db_stress -ops_per_thread=100000000 -expected_values_dir=$exp --clear_column_family_one_in=0 & sleep 120; pkill -9 db_stress TEST_TMPDIR=$db /usr/bin/time ./db_stress \ --ops_per_thread=1 -reopen=0 --expected_values_dir=$exp --clear_column_family_one_in=0 --destroy_db_initially=0 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/10291 Reviewed By: ajkr Differential Revision: D39231182 Pulled By: hx235 fbshipit-source-id: 7ab6695430460e0826727fd8c66679b32b3e44b6
91 lines
2.5 KiB
C++
91 lines
2.5 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#include "monitoring/instrumented_mutex.h"
|
|
|
|
#include "monitoring/perf_context_imp.h"
|
|
#include "monitoring/thread_status_util.h"
|
|
#include "rocksdb/system_clock.h"
|
|
#include "test_util/sync_point.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
namespace {
|
|
#ifndef NPERF_CONTEXT
|
|
Statistics* stats_for_report(SystemClock* clock, Statistics* stats) {
|
|
if (clock != nullptr && stats != nullptr &&
|
|
stats->get_stats_level() > kExceptTimeForMutex) {
|
|
return stats;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
}
|
|
#endif // NPERF_CONTEXT
|
|
} // namespace
|
|
|
|
void InstrumentedMutex::Lock() {
|
|
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
|
|
db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
|
|
stats_for_report(clock_, stats_), stats_code_);
|
|
LockInternal();
|
|
}
|
|
|
|
void InstrumentedMutex::LockInternal() {
|
|
#ifndef NDEBUG
|
|
ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
|
|
#endif
|
|
#ifdef COERCE_CONTEXT_SWITCH
|
|
if (stats_code_ == DB_MUTEX_WAIT_MICROS) {
|
|
thread_local Random rnd(301);
|
|
if (rnd.OneIn(2)) {
|
|
if (bg_cv_) {
|
|
bg_cv_->SignalAll();
|
|
}
|
|
sched_yield();
|
|
} else {
|
|
uint32_t sleep_us = rnd.Uniform(11) * 1000;
|
|
if (bg_cv_) {
|
|
bg_cv_->SignalAll();
|
|
}
|
|
SystemClock::Default()->SleepForMicroseconds(sleep_us);
|
|
}
|
|
}
|
|
#endif
|
|
mutex_.Lock();
|
|
}
|
|
|
|
void InstrumentedCondVar::Wait() {
|
|
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
|
|
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
|
|
stats_for_report(clock_, stats_), stats_code_);
|
|
WaitInternal();
|
|
}
|
|
|
|
void InstrumentedCondVar::WaitInternal() {
|
|
#ifndef NDEBUG
|
|
ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
|
|
#endif
|
|
cond_.Wait();
|
|
}
|
|
|
|
bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) {
|
|
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
|
|
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
|
|
stats_for_report(clock_, stats_), stats_code_);
|
|
return TimedWaitInternal(abs_time_us);
|
|
}
|
|
|
|
bool InstrumentedCondVar::TimedWaitInternal(uint64_t abs_time_us) {
|
|
#ifndef NDEBUG
|
|
ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
|
|
#endif
|
|
|
|
TEST_SYNC_POINT_CALLBACK("InstrumentedCondVar::TimedWaitInternal",
|
|
&abs_time_us);
|
|
|
|
return cond_.TimedWait(abs_time_us);
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|