mirror of https://github.com/facebook/rocksdb.git
Fix unsynced data loss correctness test with mixed `-test_batches_snapshots` (#9302)
Summary: This fixes two bugs in the recently committed DB verification following crash-recovery with unsynced data loss (https://github.com/facebook/rocksdb/issues/8966): The first bug was in crash test runs involving mixed values for `-test_batches_snapshots`. The problem was we were neither restoring expected values nor enabling tracing when `-test_batches_snapshots=1`. This caused a future `-test_batches_snapshots=0` run to not find enough trace data to restore expected values. The fix is to restore expected values at the start of `-test_batches_snapshots=1` runs, but still leave tracing disabled as we do not need to track those KVs. The second bug was in `db_stress` runs that restore the expected values file and use compaction filter. The compaction filter was initialized to use the pre-restore expected values, which would be `munmap()`'d during `FileExpectedStateManager::Restore()`. Then compaction filter would run into a segfault. The fix is just to reorder compaction filter init after expected values restore. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9302 Test Plan: - To verify the first problem, the below sequence used to fail; now it passes. ``` $ ./db_stress --db=./test-db/ --expected_values_dir=./test-db-expected/ --max_key=100000 --ops_per_thread=1000 --sync_fault_injection=1 --clear_column_family_one_in=0 --destroy_db_initially=0 -reopen=0 -test_batches_snapshots=0 $ ./db_stress --db=./test-db/ --expected_values_dir=./test-db-expected/ --max_key=100000 --ops_per_thread=1000 --sync_fault_injection=1 --clear_column_family_one_in=0 --destroy_db_initially=0 -reopen=0 -test_batches_snapshots=1 $ ./db_stress --db=./test-db/ --expected_values_dir=./test-db-expected/ --max_key=100000 --ops_per_thread=1000 --sync_fault_injection=1 --clear_column_family_one_in=0 --destroy_db_initially=0 -reopen=0 -test_batches_snapshots=0 ``` - The second problem occurred rarely in the form of a SIGSEGV on a file that was `munmap()`d. I have not seen it after this PR though this doesn't prove much. Reviewed By: jay-zhuang Differential Revision: D33155283 Pulled By: ajkr fbshipit-source-id: 66fd0f0edf34015a010c30015f14f104734e964e
This commit is contained in:
parent
84228e21e8
commit
863c78d2c9
|
@ -298,17 +298,8 @@ void StressTest::FinishInitDb(SharedState* shared) {
|
|||
clock_->TimeToString(now / 1000000).c_str(), FLAGS_max_key);
|
||||
PreloadDbAndReopenAsReadOnly(FLAGS_max_key, shared);
|
||||
}
|
||||
if (FLAGS_enable_compaction_filter) {
|
||||
auto* compaction_filter_factory =
|
||||
reinterpret_cast<DbStressCompactionFilterFactory*>(
|
||||
options_.compaction_filter_factory.get());
|
||||
assert(compaction_filter_factory);
|
||||
compaction_filter_factory->SetSharedState(shared);
|
||||
fprintf(stdout, "Compaction filter factory: %s\n",
|
||||
compaction_filter_factory->Name());
|
||||
}
|
||||
|
||||
if (shared->HasHistory() && IsStateTracked()) {
|
||||
if (shared->HasHistory()) {
|
||||
// The way it works right now is, if there's any history, that means the
|
||||
// previous run mutating the DB had all its operations traced, in which case
|
||||
// we should always be able to `Restore()` the expected values to match the
|
||||
|
@ -329,6 +320,19 @@ void StressTest::FinishInitDb(SharedState* shared) {
|
|||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAGS_enable_compaction_filter) {
|
||||
auto* compaction_filter_factory =
|
||||
reinterpret_cast<DbStressCompactionFilterFactory*>(
|
||||
options_.compaction_filter_factory.get());
|
||||
assert(compaction_filter_factory);
|
||||
// This must be called only after any potential `SharedState::Restore()` has
|
||||
// completed in order for the `compaction_filter_factory` to operate on the
|
||||
// correct latest values file.
|
||||
compaction_filter_factory->SetSharedState(shared);
|
||||
fprintf(stdout, "Compaction filter factory: %s\n",
|
||||
compaction_filter_factory->Name());
|
||||
}
|
||||
}
|
||||
|
||||
bool StressTest::VerifySecondaries() {
|
||||
|
|
|
@ -326,6 +326,8 @@ bool FileExpectedStateManager::HasHistory() {
|
|||
|
||||
#ifndef ROCKSDB_LITE
|
||||
|
||||
namespace {
|
||||
|
||||
// An `ExpectedStateTraceRecordHandler` applies a configurable number of
|
||||
// write operation trace records to the configured expected state. It is used in
|
||||
// `FileExpectedStateManager::Restore()` to sync the expected state with the
|
||||
|
@ -429,6 +431,8 @@ class ExpectedStateTraceRecordHandler : public TraceRecord::Handler,
|
|||
ExpectedState* state_;
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Status FileExpectedStateManager::Restore(DB* db) {
|
||||
assert(HasHistory());
|
||||
SequenceNumber seqno = db->GetLatestSequenceNumber();
|
||||
|
|
Loading…
Reference in New Issue