mirror of
https://github.com/facebook/rocksdb.git
synced 2024-11-26 16:30:56 +00:00
Prevent manual flush hanging in read-only mode (#4615)
Summary: The logic to wait for stall conditions to clear before beginning a manual flush didn't take into account whether the DB was in read-only mode. In read-only mode the stall conditions would never clear since no background work is happening, so the wait would be never-ending. It's probably better to return an error to the user. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4615 Differential Revision: D12888008 Pulled By: ajkr fbshipit-source-id: 1c474b42a7ac38d9fd0d0e2340ff1d53e684d83c
This commit is contained in:
parent
b8f68bac38
commit
5c794d94c4
|
@ -220,6 +220,44 @@ TEST_F(DBFlushTest, FlushError) {
|
|||
ASSERT_NE(s, Status::OK());
|
||||
}
|
||||
|
||||
TEST_F(DBFlushTest, ManualFlushFailsInReadOnlyMode) {
|
||||
// Regression test for bug where manual flush hangs forever when the DB
|
||||
// is in read-only mode. Verify it now at least returns, despite failing.
|
||||
Options options;
|
||||
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
||||
new FaultInjectionTestEnv(env_));
|
||||
options.env = fault_injection_env.get();
|
||||
options.max_write_buffer_number = 2;
|
||||
Reopen(options);
|
||||
|
||||
// Trigger a first flush but don't let it run
|
||||
ASSERT_OK(db_->PauseBackgroundWork());
|
||||
ASSERT_OK(Put("key1", "value1"));
|
||||
FlushOptions flush_opts;
|
||||
flush_opts.wait = false;
|
||||
ASSERT_OK(db_->Flush(flush_opts));
|
||||
|
||||
// Write a key to the second memtable so we have something to flush later
|
||||
// after the DB is in read-only mode.
|
||||
ASSERT_OK(Put("key2", "value2"));
|
||||
|
||||
// Let the first flush continue, hit an error, and put the DB in read-only
|
||||
// mode.
|
||||
fault_injection_env->SetFilesystemActive(false);
|
||||
ASSERT_OK(db_->ContinueBackgroundWork());
|
||||
dbfull()->TEST_WaitForFlushMemTable();
|
||||
uint64_t num_bg_errors;
|
||||
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBackgroundErrors,
|
||||
&num_bg_errors));
|
||||
ASSERT_GT(num_bg_errors, 0);
|
||||
|
||||
// In the bug scenario, triggering another flush would cause the second flush
|
||||
// to hang forever. After the fix we expect it to return an error.
|
||||
ASSERT_NOK(db_->Flush(FlushOptions()));
|
||||
|
||||
Close();
|
||||
}
|
||||
|
||||
TEST_P(DBAtomicFlushTest, ManualAtomicFlush) {
|
||||
Options options = CurrentOptions();
|
||||
options.create_if_missing = true;
|
||||
|
|
|
@ -1543,6 +1543,14 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|||
WriteStallCondition write_stall_condition = WriteStallCondition::kNormal;
|
||||
do {
|
||||
if (write_stall_condition != WriteStallCondition::kNormal) {
|
||||
// Same error handling as user writes: Don't wait if there's a
|
||||
// background error, even if it's a soft error. We might wait here
|
||||
// indefinitely as the pending flushes/compactions may never finish
|
||||
// successfully, resulting in the stall condition lasting indefinitely
|
||||
if (error_handler_.IsBGWorkStopped()) {
|
||||
return error_handler_.GetBGError();
|
||||
}
|
||||
|
||||
TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait");
|
||||
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
||||
"[%s] WaitUntilFlushWouldNotStallWrites"
|
||||
|
|
Loading…
Reference in a new issue