mirror of https://github.com/facebook/rocksdb.git
Fix race condition in db_stress checkpoint cleanup (#11389)
Summary: The old cleanup code had a race condition: 1. Test thread: DestroyDB() marked a file as trash 2. DeleteScheduler thread: Got the file's size and decided to delete it in chunks 3. Test thread: DestroyDir() deleted that trash file 4. DeleteScheduler thread: Began deleting in chunks starting by calling ReopenWritableFile(). Unfortunately this recreates the deleted trash file 5. Test thread: DestroyDir() fails to remove the parent directory because it contains the file created in 4. 6. Test thread: Checkpoint::Create() fails due to the directory already existing It could be repro'd with the following patch/command. Patch: ``` diff --git a/file/delete_scheduler.cc b/file/delete_scheduler.cc index 8a2d1615d..337d24a60 100644 --- a/file/delete_scheduler.cc +++ b/file/delete_scheduler.cc @@ -317,6 +317,12 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, &num_hard_links, nullptr); if (my_status.ok()) { if (num_hard_links == 1) { + // Give some time for DestroyDir() to delete file entries. Then, the + // below `ReopenWritableFile()` will recreate files, preventing the + // parent directory from being deleted. + if (rand() % 2 == 0) { + usleep(1000); + } std::unique_ptr<FSWritableFile> wf; my_status = fs_->ReopenWritableFile(path_in_trash, FileOptions(), &wf, nullptr); diff --git a/file/file_util.cc b/file/file_util.cc index 43608fcdc..2cee1ad8e 100644 --- a/file/file_util.cc +++ b/file/file_util.cc @@ -263,6 +263,13 @@ Status DestroyDir(Env* env, const std::string& dir) { } } + // Give some time for the DeleteScheduler thread's ReopenWritableFile() to + // recreate deleted files + if (dir.find("checkpoint") != std::string::npos) { + fprintf(stderr, "waiting to destroy %s\n", dir.c_str()); + usleep(10000); + } + if (s.ok()) { s = env->DeleteDir(dir); // DeleteDir might or might not report NotFound ``` Command: ``` TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --simple --write_buffer_size=131072 --target_file_size_base=131072 --max_bytes_for_level_base=524288 --checkpoint_one_in=100 --clear_column_family_one_in=0 --max_key=1000 --value_size_mult=33 --sst_file_manager_bytes_per_truncate=4096 --sst_file_manager_bytes_per_sec=1048576 --interval=3 --compression_type=none --sync_fault_injection=1 ``` Obviously we don't want to use scheduled deletion here as we need the checkpoint directory deleted immediately. I suspect the DestroyDir() was an attempt to fixup incomplete DestroyDB()s. Now that we expect DestroyDB() to be complete I removed that code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11389 Reviewed By: hx235 Differential Revision: D45137142 Pulled By: ajkr fbshipit-source-id: 2af743d342c77cc414fd25fc4c9d7c9c6079ad24
This commit is contained in:
parent
43e9a60bb2
commit
6cac4c79d4
|
@ -1835,19 +1835,11 @@ Status StressTest::TestCheckpoint(ThreadState* thread,
|
|||
Options tmp_opts(options_);
|
||||
tmp_opts.listeners.clear();
|
||||
tmp_opts.env = db_stress_env;
|
||||
// Avoid delayed deletion so whole directory can be deleted
|
||||
tmp_opts.sst_file_manager.reset();
|
||||
|
||||
DestroyDB(checkpoint_dir, tmp_opts);
|
||||
|
||||
if (db_stress_env->FileExists(checkpoint_dir).ok()) {
|
||||
// If the directory might still exist, try to delete the files one by one.
|
||||
// Likely a trash file is still there.
|
||||
Status my_s = DestroyDir(db_stress_env, checkpoint_dir);
|
||||
if (!my_s.ok()) {
|
||||
fprintf(stderr, "Fail to destory directory before checkpoint: %s",
|
||||
my_s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
Checkpoint* checkpoint = nullptr;
|
||||
Status s = Checkpoint::Create(db_, &checkpoint);
|
||||
if (s.ok()) {
|
||||
|
|
Loading…
Reference in New Issue