mirror of https://github.com/facebook/rocksdb.git
Deflake DBWALTest.FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL (#11016)
Summary: **Context/Summary:** Credit to ajkr's https://github.com/facebook/rocksdb/pull/11016#pullrequestreview-1205020134, flaky test https://app.circleci.com/pipelines/github/facebook/rocksdb/21985/workflows/5f6cc355-78c1-46d8-89ee-0fd679725a8a/jobs/540878 is due to `Flush()` called in the test returned earlier than obsoleted WAL being found in background flush and SyncWAL() was called (i.e, "sync_point_called" sets to true). Fix this by making checking `sync_point_called == true` after obsoleted WAL is found and `SyncWAL()` is called. Also rename the "sync_point_called" to be something more specific. Also, fix a potential flakiness due to manually setting a log threshold to force new manifest creation. This is unreliable so I decided to use sync point to force new manifest creation. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11016 Test Plan: make check Reviewed By: pdillinger Differential Revision: D41717786 Pulled By: hx235 fbshipit-source-id: ad1e4701a987285bbe6c8e7d9b05c4db06b4edf4
This commit is contained in:
parent
23af6786a9
commit
15bb4ea084
|
@ -1602,9 +1602,6 @@ TEST_F(DBWALTest, RaceInstallFlushResultsWithWalObsoletion) {
|
|||
TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
|
||||
Options options = CurrentOptions();
|
||||
options.track_and_verify_wals_in_manifest = true;
|
||||
// Set a small max_manifest_file_size to force manifest creation
|
||||
// in SyncWAL() for tet purpose
|
||||
options.max_manifest_file_size = 170;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// Accumulate memtable m1 and create the 1st wal (i.e, 4.log)
|
||||
|
@ -1619,25 +1616,26 @@ TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
|
|||
// active) log and release the lock
|
||||
// (2) SyncWAL() proceeds with the lock. It
|
||||
// creates a new manifest and syncs all the inactive wals before the latest
|
||||
// (i.e, active log), which is 4.log. SyncWAL() is not aware of the fact
|
||||
// that 4.log has marked as to be obseleted.
|
||||
// Prior to the fix, such wal sync will then add a WAL addition record of
|
||||
// 4.log to the new manifest without any special treatment.
|
||||
// (i.e, active log), which is 4.log. Note that SyncWAL() is not aware of the
|
||||
// fact that 4.log has marked as to be obseleted. Prior to the fix, such wal
|
||||
// sync will then add a WAL addition record of 4.log to the new manifest
|
||||
// without any special treatment.
|
||||
// (3) BackgroundFlush() will eventually purge 4.log.
|
||||
bool sync_point_called = false;
|
||||
bool new_manifest_created = false;
|
||||
bool wal_synced = false;
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"FindObsoleteFiles::PostMutexUnlock", [&](void*) {
|
||||
ASSERT_OK(env_->FileExists(wal_file_path));
|
||||
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"VersionSet::ProcessManifestWrites:BeforeNewManifest",
|
||||
[&](void*) { new_manifest_created = true; });
|
||||
"VersionSet::ProcessManifestWrites:"
|
||||
"PostDecidingCreateNewManifestOrNot",
|
||||
[&](void* arg) {
|
||||
bool* new_descriptor_log = (bool*)arg;
|
||||
*new_descriptor_log = true;
|
||||
});
|
||||
|
||||
ASSERT_OK(db_->SyncWAL());
|
||||
ASSERT_TRUE(new_manifest_created);
|
||||
|
||||
sync_point_called = true;
|
||||
wal_synced = true;
|
||||
});
|
||||
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
|
@ -1650,21 +1648,22 @@ TEST_F(DBWALTest, FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL) {
|
|||
"PostDeleteWAL");
|
||||
}
|
||||
});
|
||||
|
||||
SyncPoint::GetInstance()->LoadDependency(
|
||||
{{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
|
||||
{{"DBImpl::BackgroundCallFlush:FilesFound",
|
||||
"PreConfrimObsoletedWALSynced"},
|
||||
{"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
|
||||
"PostDeleteWAL",
|
||||
"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
|
||||
"PreConfrimWALDeleted"}});
|
||||
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
ASSERT_OK(Flush());
|
||||
ASSERT_TRUE(sync_point_called);
|
||||
|
||||
TEST_SYNC_POINT(
|
||||
"DBWALTest::FixSyncWalOnObseletedWalWithNewManifestCausingMissingWAL::"
|
||||
"PreConfrimWALDeleted");
|
||||
TEST_SYNC_POINT("PreConfrimObsoletedWALSynced");
|
||||
ASSERT_TRUE(wal_synced);
|
||||
|
||||
TEST_SYNC_POINT("PreConfrimWALDeleted");
|
||||
// BackgroundFlush() purged 4.log
|
||||
// because the memtable associated with the WAL was flushed and new WAL was
|
||||
// created (i.e, 8.log)
|
||||
|
|
|
@ -4983,6 +4983,9 @@ Status VersionSet::ProcessManifestWrites(
|
|||
} else {
|
||||
pending_manifest_file_number_ = manifest_file_number_;
|
||||
}
|
||||
TEST_SYNC_POINT_CALLBACK(
|
||||
"VersionSet::ProcessManifestWrites:PostDecidingCreateNewManifestOrNot",
|
||||
&new_descriptor_log);
|
||||
|
||||
// Local cached copy of state variable(s). WriteCurrentStateToManifest()
|
||||
// reads its content after releasing db mutex to avoid race with
|
||||
|
|
Loading…
Reference in New Issue