From d3c4b7fe0bd0b7f3ec27895c517d43cc6b4c06ad Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Mon, 10 Jun 2024 12:35:53 -0700 Subject: [PATCH] Enable reopen with un-synced data loss in crash test (#12746) Summary: **Context/Summary:** https://github.com/facebook/rocksdb/pull/12567 disabled reopen with un-synced data loss in crash test since we discovered un-synced WAL loss and we currently don't support prefix recovery in reopen. This PR explicitly sync WAL data before close to avoid such data loss case from happening and add back the testing coverage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12746 Test Plan: CI Reviewed By: ajkr Differential Revision: D58326890 Pulled By: hx235 fbshipit-source-id: 0865f715e97c5948d7cb3aea62fe2a626cb6522a --- db_stress_tool/db_stress_test_base.cc | 21 +++++++++++++++++++++ tools/db_crashtest.py | 5 ----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index c4c3e0f454..04f7b245f9 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -3398,6 +3398,27 @@ void StressTest::Reopen(ThreadState* thread) { } column_families_.clear(); + // Currently reopen does not restore expected state + // with potential data loss in mind like the first open before + // crash-recovery verification does. Therefore it always expects no data loss + // and we should ensure no data loss in testing. + // TODO(hx235): eliminate the FlushWAL(true /* sync */)/SyncWAL() below + if (!FLAGS_disable_wal) { + Status s; + if (FLAGS_manual_wal_flush_one_in > 0) { + s = db_->FlushWAL(/*sync=*/true); + } else { + s = db_->SyncWAL(); + } + if (!s.ok()) { + fprintf(stderr, + "Error persisting WAL data which is needed before reopening the " + "DB: %s\n", + s.ToString().c_str()); + exit(1); + } + } + if (thread->rand.OneIn(2)) { Status s = db_->Close(); if (!s.ok()) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 7fdca1c089..894f26f1b7 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -738,11 +738,6 @@ def finalize_and_sanitize(src_params): # files, which would be problematic when unsynced data can be lost in # crash recoveries. dest_params["enable_compaction_filter"] = 0 - # TODO(hx235): re-enable "reopen" after supporting unsynced data loss - # verification upon reopen. Currently reopen does not restore expected state - # with potential data loss in mind like start of each `./db_stress` run. - # Therefore it always expects no data loss. - dest_params["reopen"] = 0 # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb # unordered_write is only enabled with --txn, and txn_params disables inplace_update_support, so # setting allow_concurrent_memtable_write=1 won't conflcit with inplace_update_support.