Fix runtime error in UpdateTieredCache due to integer underflow (#11949)

Summary:
With the introduction of the `UpdateTieredCache` API, its possible to dynamically change the compressed secondary cache ratio of the total cache capacity. In order to optimize performance, we avoid using a mutex when inserting/releasing placeholder entries, which can result in some inaccuracy in the accounting during the dynamic update. This inaccuracy was causing a runtime error due to an integer underflow in `UpdateCacheReservationRatio`, causing ubsan crash tests to fail. This PR fixes it by explicitly checking for the underflow.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11949

Test Plan:
1. Added a unit test that fails without the fix
2. Run ubsan_crash

Reviewed By: akankshamahajan15

Differential Revision: D50240217

Pulled By: anand1976

fbshipit-source-id: d2f7b79da54eec8b61aec2cc1f2943da5d5847ac
This commit is contained in:
anand76 2023-10-12 15:09:40 -07:00 committed by Facebook GitHub Bot
parent d010b02e86
commit 90e160733e
4 changed files with 64 additions and 10 deletions

View file

@ -1312,6 +1312,27 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) {
ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0));
}
TEST_P(CompressedSecCacheTestWithTiered,
DynamicUpdateWithReservationUnderflow) {
std::shared_ptr<Cache> tiered_cache = GetTieredCache();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"CacheWithSecondaryAdapter::Release:ChargeSecCache1",
"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"},
{"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End",
"CacheWithSecondaryAdapter::Release:ChargeSecCache2"}});
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
port::Thread reserve_release_thread([&]() {
EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(50), Status::OK());
EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(0), Status::OK());
});
ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.01));
reserve_release_thread.join();
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.3));
}
INSTANTIATE_TEST_CASE_P(
CompressedSecCacheTests, CompressedSecCacheTestWithTiered,
::testing::Values(

View file

@ -5,8 +5,11 @@
#include "cache/secondary_cache_adapter.h"
#include <atomic>
#include "cache/tiered_secondary_cache.h"
#include "monitoring/perf_context_imp.h"
#include "test_util/sync_point.h"
#include "util/cast_util.h"
namespace ROCKSDB_NAMESPACE {
@ -100,7 +103,8 @@ CacheWithSecondaryAdapter::CacheWithSecondaryAdapter(
// secondary cache is freed from the reservation.
s = pri_cache_res_->UpdateCacheReservation(sec_capacity);
assert(s.ok());
sec_cache_res_ratio_ = (double)sec_capacity / target_->GetCapacity();
sec_cache_res_ratio_.store((double)sec_capacity / target_->GetCapacity(),
std::memory_order_relaxed);
}
}
@ -233,7 +237,8 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value,
CompressionType type) {
Status s = target_->Insert(key, value, helper, charge, handle, priority);
if (s.ok() && value == nullptr && distribute_cache_res_) {
size_t sec_charge = static_cast<size_t>(charge * (sec_cache_res_ratio_));
size_t sec_charge = static_cast<size_t>(
charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed)));
s = secondary_cache_->Deflate(sec_charge);
assert(s.ok());
s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false);
@ -282,7 +287,10 @@ bool CacheWithSecondaryAdapter::Release(Handle* handle,
ObjectPtr v = target_->Value(handle);
if (v == nullptr && distribute_cache_res_) {
size_t charge = target_->GetCharge(handle);
size_t sec_charge = static_cast<size_t>(charge * (sec_cache_res_ratio_));
size_t sec_charge = static_cast<size_t>(
charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed)));
TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache1");
TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache2");
Status s = secondary_cache_->Inflate(sec_charge);
assert(s.ok());
s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/true);
@ -433,7 +441,9 @@ const char* CacheWithSecondaryAdapter::Name() const {
// where the new capacity < total cache reservations.
void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) {
size_t sec_capacity = static_cast<size_t>(
capacity * (distribute_cache_res_ ? sec_cache_res_ratio_ : 0.0));
capacity * (distribute_cache_res_
? sec_cache_res_ratio_.load(std::memory_order_relaxed)
: 0.0));
size_t old_sec_capacity = 0;
if (distribute_cache_res_) {
@ -493,7 +503,8 @@ void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) {
// in the future.
Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio(
double compressed_secondary_ratio) {
if (!distribute_cache_res_ || sec_cache_res_ratio_ == 0.0) {
if (!distribute_cache_res_ ||
sec_cache_res_ratio_.load(std::memory_order_relaxed) == 0.0) {
return Status::NotSupported();
}
@ -507,13 +518,33 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio(
return s;
}
size_t old_sec_reserved =
old_sec_capacity - pri_cache_res_->GetTotalMemoryUsed();
TEST_SYNC_POINT(
"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin");
// There's a possible race condition here. Since the read of pri_cache_res_
// memory used (secondary cache usage charged to primary cache), and the
// change to sec_cache_res_ratio_ are not guarded by a mutex, its possible
// that an Insert/Release in another thread might decrease/increase the
// pri_cache_res_ reservation by the wrong amount. This should not be a
// problem because updating the sec/pri ratio is a rare operation, and
// the worst that can happen is we may over/under charge the secondary
// cache usage by a little bit. But we do need to protect against
// underflow of old_sec_reserved.
// TODO: Make the accounting more accurate by tracking the total memory
// reservation on the primary cache. This will also allow us to remove
// the restriction of not being able to change the sec/pri ratio from
// 0.0 to higher.
size_t sec_charge_to_pri = pri_cache_res_->GetTotalMemoryUsed();
size_t old_sec_reserved = (old_sec_capacity > sec_charge_to_pri)
? (old_sec_capacity - sec_charge_to_pri)
: 0;
// Calculate the new secondary cache reservation
size_t sec_reserved = static_cast<size_t>(
old_sec_reserved *
(double)(compressed_secondary_ratio / sec_cache_res_ratio_));
sec_cache_res_ratio_ = compressed_secondary_ratio;
(double)(compressed_secondary_ratio /
sec_cache_res_ratio_.load(std::memory_order_relaxed)));
sec_cache_res_ratio_.store(compressed_secondary_ratio,
std::memory_order_relaxed);
if (sec_capacity > old_sec_capacity) {
// We're increasing the ratio, thus ending up with a larger secondary
// cache and a smaller usable primary cache capacity. Similar to
@ -553,6 +584,7 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio(
}
}
TEST_SYNC_POINT("CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End");
#ifndef NDEBUG
// As mentioned in the function comments, we may accumulate some erros when
// the ratio is changed. We set a flag here which disables some assertions

View file

@ -80,7 +80,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper {
std::shared_ptr<ConcurrentCacheReservationManager> pri_cache_res_;
// Fraction of a cache memory reservation to be assigned to the secondary
// cache
double sec_cache_res_ratio_;
std::atomic<double> sec_cache_res_ratio_;
port::Mutex mutex_;
#ifndef NDEBUG
bool ratio_changed_ = false;

View file

@ -0,0 +1 @@
Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio.