From 90e160733ef27321c2a10b64225702da0dd548db Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 12 Oct 2023 15:09:40 -0700 Subject: [PATCH] Fix runtime error in UpdateTieredCache due to integer underflow (#11949) Summary: With the introduction of the `UpdateTieredCache` API, its possible to dynamically change the compressed secondary cache ratio of the total cache capacity. In order to optimize performance, we avoid using a mutex when inserting/releasing placeholder entries, which can result in some inaccuracy in the accounting during the dynamic update. This inaccuracy was causing a runtime error due to an integer underflow in `UpdateCacheReservationRatio`, causing ubsan crash tests to fail. This PR fixes it by explicitly checking for the underflow. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11949 Test Plan: 1. Added a unit test that fails without the fix 2. Run ubsan_crash Reviewed By: akankshamahajan15 Differential Revision: D50240217 Pulled By: anand1976 fbshipit-source-id: d2f7b79da54eec8b61aec2cc1f2943da5d5847ac --- cache/compressed_secondary_cache_test.cc | 21 ++++++++ cache/secondary_cache_adapter.cc | 50 +++++++++++++++---- cache/secondary_cache_adapter.h | 2 +- .../sec_cache_reservation_underflow.md | 1 + 4 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/bug_fixes/sec_cache_reservation_underflow.md diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index d82be10734..71702b29f1 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -1312,6 +1312,27 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0)); } +TEST_P(CompressedSecCacheTestWithTiered, + DynamicUpdateWithReservationUnderflow) { + std::shared_ptr tiered_cache = GetTieredCache(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"CacheWithSecondaryAdapter::Release:ChargeSecCache1", + "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"}, + {"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End", + "CacheWithSecondaryAdapter::Release:ChargeSecCache2"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + port::Thread reserve_release_thread([&]() { + EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(50), Status::OK()); + EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(0), Status::OK()); + }); + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.01)); + reserve_release_thread.join(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.3)); +} + INSTANTIATE_TEST_CASE_P( CompressedSecCacheTests, CompressedSecCacheTestWithTiered, ::testing::Values( diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index b378197ccf..70571f5b2f 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -5,8 +5,11 @@ #include "cache/secondary_cache_adapter.h" +#include + #include "cache/tiered_secondary_cache.h" #include "monitoring/perf_context_imp.h" +#include "test_util/sync_point.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { @@ -100,7 +103,8 @@ CacheWithSecondaryAdapter::CacheWithSecondaryAdapter( // secondary cache is freed from the reservation. s = pri_cache_res_->UpdateCacheReservation(sec_capacity); assert(s.ok()); - sec_cache_res_ratio_ = (double)sec_capacity / target_->GetCapacity(); + sec_cache_res_ratio_.store((double)sec_capacity / target_->GetCapacity(), + std::memory_order_relaxed); } } @@ -233,7 +237,8 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, CompressionType type) { Status s = target_->Insert(key, value, helper, charge, handle, priority); if (s.ok() && value == nullptr && distribute_cache_res_) { - size_t sec_charge = static_cast(charge * (sec_cache_res_ratio_)); + size_t sec_charge = static_cast( + charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); s = secondary_cache_->Deflate(sec_charge); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false); @@ -282,7 +287,10 @@ bool CacheWithSecondaryAdapter::Release(Handle* handle, ObjectPtr v = target_->Value(handle); if (v == nullptr && distribute_cache_res_) { size_t charge = target_->GetCharge(handle); - size_t sec_charge = static_cast(charge * (sec_cache_res_ratio_)); + size_t sec_charge = static_cast( + charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); + TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache1"); + TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache2"); Status s = secondary_cache_->Inflate(sec_charge); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/true); @@ -433,7 +441,9 @@ const char* CacheWithSecondaryAdapter::Name() const { // where the new capacity < total cache reservations. void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { size_t sec_capacity = static_cast( - capacity * (distribute_cache_res_ ? sec_cache_res_ratio_ : 0.0)); + capacity * (distribute_cache_res_ + ? sec_cache_res_ratio_.load(std::memory_order_relaxed) + : 0.0)); size_t old_sec_capacity = 0; if (distribute_cache_res_) { @@ -493,7 +503,8 @@ void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { // in the future. Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( double compressed_secondary_ratio) { - if (!distribute_cache_res_ || sec_cache_res_ratio_ == 0.0) { + if (!distribute_cache_res_ || + sec_cache_res_ratio_.load(std::memory_order_relaxed) == 0.0) { return Status::NotSupported(); } @@ -507,13 +518,33 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( return s; } - size_t old_sec_reserved = - old_sec_capacity - pri_cache_res_->GetTotalMemoryUsed(); + TEST_SYNC_POINT( + "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"); + + // There's a possible race condition here. Since the read of pri_cache_res_ + // memory used (secondary cache usage charged to primary cache), and the + // change to sec_cache_res_ratio_ are not guarded by a mutex, its possible + // that an Insert/Release in another thread might decrease/increase the + // pri_cache_res_ reservation by the wrong amount. This should not be a + // problem because updating the sec/pri ratio is a rare operation, and + // the worst that can happen is we may over/under charge the secondary + // cache usage by a little bit. But we do need to protect against + // underflow of old_sec_reserved. + // TODO: Make the accounting more accurate by tracking the total memory + // reservation on the primary cache. This will also allow us to remove + // the restriction of not being able to change the sec/pri ratio from + // 0.0 to higher. + size_t sec_charge_to_pri = pri_cache_res_->GetTotalMemoryUsed(); + size_t old_sec_reserved = (old_sec_capacity > sec_charge_to_pri) + ? (old_sec_capacity - sec_charge_to_pri) + : 0; // Calculate the new secondary cache reservation size_t sec_reserved = static_cast( old_sec_reserved * - (double)(compressed_secondary_ratio / sec_cache_res_ratio_)); - sec_cache_res_ratio_ = compressed_secondary_ratio; + (double)(compressed_secondary_ratio / + sec_cache_res_ratio_.load(std::memory_order_relaxed))); + sec_cache_res_ratio_.store(compressed_secondary_ratio, + std::memory_order_relaxed); if (sec_capacity > old_sec_capacity) { // We're increasing the ratio, thus ending up with a larger secondary // cache and a smaller usable primary cache capacity. Similar to @@ -553,6 +584,7 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( } } + TEST_SYNC_POINT("CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End"); #ifndef NDEBUG // As mentioned in the function comments, we may accumulate some erros when // the ratio is changed. We set a flag here which disables some assertions diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 34d52a665e..0d5f2d6ea4 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -80,7 +80,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { std::shared_ptr pri_cache_res_; // Fraction of a cache memory reservation to be assigned to the secondary // cache - double sec_cache_res_ratio_; + std::atomic sec_cache_res_ratio_; port::Mutex mutex_; #ifndef NDEBUG bool ratio_changed_ = false; diff --git a/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md b/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md new file mode 100644 index 0000000000..571100a3ed --- /dev/null +++ b/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md @@ -0,0 +1 @@ +Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio.