Fix data race in WalManager (#12439)

Summary:
Crash tests were failing due to data race in accessing `purge_wal_files_last_run_`. This PR changes it to atomic.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12439

Test Plan:
- existing UT
- not able to repro with `python3 tools/db_crashtest.py whitebox --simple --max_key=25000000 --WAL_ttl_seconds=1` and TSAN yet, will monitor internal crash tests

Reviewed By: anand1976

Differential Revision: D54920817

Pulled By: cbi42

fbshipit-source-id: 80ee026b1785ad5dba11295ed35c88889df5f5a6
This commit is contained in:
Changyu Bi 2024-03-14 21:24:06 -07:00 committed by Facebook GitHub Bot
parent 1104eaa35e
commit 096fb9b67d
3 changed files with 11 additions and 6 deletions

View File

@ -158,11 +158,14 @@ void WalManager::PurgeObsoleteWALFiles() {
? std::min(kDefaultIntervalToDeleteObsoleteWAL,
std::max(uint64_t{1}, db_options_.WAL_ttl_seconds / 2))
: kDefaultIntervalToDeleteObsoleteWAL;
if (purge_wal_files_last_run_ + time_to_check > now_seconds) {
uint64_t old_last_run_time = purge_wal_files_last_run_.LoadRelaxed();
do {
if (old_last_run_time + time_to_check > now_seconds) {
// last run is recent enough, no need to purge
return;
}
purge_wal_files_last_run_ = now_seconds;
} while (!purge_wal_files_last_run_.CasWeakRelaxed(
/*expected=*/old_last_run_time, /*desired=*/now_seconds));
std::string archival_dir = ArchivalDirectory(wal_dir_);
std::vector<std::string> files;

View File

@ -25,6 +25,7 @@
#include "rocksdb/status.h"
#include "rocksdb/transaction_log.h"
#include "rocksdb/types.h"
#include "util/atomic.h"
namespace ROCKSDB_NAMESPACE {
@ -118,7 +119,7 @@ class WalManager {
port::Mutex read_first_record_cache_mutex_;
// last time when PurgeObsoleteWALFiles ran.
uint64_t purge_wal_files_last_run_;
RelaxedAtomic<uint64_t> purge_wal_files_last_run_;
bool seq_per_batch_;

View File

@ -0,0 +1 @@
* Fixed a data race in WalManager that may affect how frequent PurgeObsoleteWALFiles() runs.