diff --git a/HISTORY.md b/HISTORY.md index efdadab65a..ba299fce16 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Bug Fixes * Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets. * Fix the incorrect disabling of SST rate limited deletion when the WAL and DB are in different directories. Only WAL rate limited deletion should be disabled if its in a different directory. +* Fix `DisableManualCompaction()` to cancel compactions even when they are waiting on automatic compactions to drain due to `CompactRangeOptions::exclusive_manual_compactions == true`. ### New Features * Print information about blob files when using "ldb list_live_files_metadata" diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 3263444114..24e9dbf4a6 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6817,6 +6817,49 @@ TEST_F(DBCompactionTest, FIFOWarm) { Destroy(options); } +TEST_F(DBCompactionTest, + DisableManualCompactionDoesNotWaitForDrainingAutomaticCompaction) { + // When `CompactRangeOptions::exclusive_manual_compaction == true`, we wait + // for automatic compactions to drain before starting the manual compaction. + // This test verifies `DisableManualCompaction()` can cancel such a compaction + // without waiting for the drain to complete. + const int kNumL0Files = 4; + + // Enforces manual compaction enters wait loop due to pending automatic + // compaction. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BGWorkCompaction", "DBImpl::RunManualCompaction:NotScheduled"}, + {"DBImpl::RunManualCompaction:WaitScheduled", + "BackgroundCallCompaction:0"}}); + // The automatic compaction will cancel the waiting manual compaction. + // Completing this implies the cancellation did not wait on automatic + // compactions to finish. + bool callback_completed = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void* /*arg*/) { + db_->DisableManualCompaction(); + callback_completed = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + for (int i = 0; i < kNumL0Files; ++i) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + ASSERT_TRUE(db_->CompactRange(cro, nullptr, nullptr).IsIncomplete()); + + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(callback_completed); +} + #endif // !defined(ROCKSDB_LITE) } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index f85df49eb2..18226284b3 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1715,8 +1715,11 @@ Status DBImpl::RunManualCompaction( // When a manual compaction arrives, temporarily disable scheduling of // non-manual compactions and wait until the number of scheduled compaction - // jobs drops to zero. This is needed to ensure that this manual compaction - // can compact any range of keys/files. + // jobs drops to zero. This used to be needed to ensure that this manual + // compaction can compact any range of keys/files. Now it is optional + // (see `CompactRangeOptions::exclusive_manual_compaction`). The use case for + // `exclusive_manual_compaction=true` (the default) is unclear beyond not + // trusting the new code. // // HasPendingManualCompaction() is true when at least one thread is inside // RunManualCompaction(), i.e. during that time no other compaction will @@ -1730,8 +1733,20 @@ Status DBImpl::RunManualCompaction( AddManualCompaction(&manual); TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_); if (exclusive) { + // Limitation: there's no way to wake up the below loop when user sets + // `*manual.canceled`. So `CompactRangeOptions::exclusive_manual_compaction` + // and `CompactRangeOptions::canceled` might not work well together. while (bg_bottom_compaction_scheduled_ > 0 || bg_compaction_scheduled_ > 0) { + if (manual_compaction_paused_ > 0 || + (manual.canceled != nullptr && *manual.canceled == true)) { + // Pretend the error came from compaction so the below cleanup/error + // handling code can process it. + manual.done = true; + manual.status = + Status::Incomplete(Status::SubCode::kManualCompactionPaused); + break; + } TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled"); ROCKS_LOG_INFO( immutable_db_options_.info_log, @@ -2224,6 +2239,10 @@ Status DBImpl::EnableAutoCompaction( void DBImpl::DisableManualCompaction() { InstrumentedMutexLock l(&mutex_); manual_compaction_paused_.fetch_add(1, std::memory_order_release); + + // Wake up manual compactions waiting to start. + bg_cv_.SignalAll(); + // Wait for any pending manual compactions to finish (typically through // failing with `Status::Incomplete`) prior to returning. This way we are // guaranteed no pending manual compaction will commit while manual diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 93a0df7adf..f53dcaa2e5 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1741,6 +1741,9 @@ struct CompactRangeOptions { Slice* full_history_ts_low = nullptr; // Allows cancellation of an in-progress manual compaction. + // + // Cancellation can be delayed waiting on automatic compactions when used + // together with `exclusive_manual_compaction == true`. std::atomic* canceled = nullptr; };