From d976f689770dfc1a0b47f06d176c1baa2f58eb3e Mon Sep 17 00:00:00 2001 From: Guido Tagliavini Ponce Date: Wed, 27 Jul 2022 18:55:55 -0700 Subject: [PATCH] Fix assertion failure and memory leak in ClockCache. (#10430) Summary: This fixes two issues: - [T127355728](https://www.internalfb.com/intern/tasks/?t=127355728): In the stress tests, when the ClockCache is operating close to full capacity and a burst of inserts are concurrently executed, every slot in the hash table may become occupied. This contradicts an assertion in the code, which is no longer valid in the lock-free setting. We are removing that assertion and handling the case of an insertion into a full table. - [T127427659](https://www.internalfb.com/intern/tasks/?t=127427659): There was a memory leak when an insertion is performed over capacity, but no handle is provided. In that case, a handle was dynamically allocated, but the pointer wasn't stored anywhere. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10430 Test Plan: - ``make -j24 check`` - ``make -j24 USE_CLANG=1 COMPILE_WITH_ASAN=1 COMPILE_WITH_UBSAN=1 CRASH_TEST_EXT_ARGS="--duration=960 --cache_type=clock_cache" blackbox_crash_test_with_atomic_flush`` - ``make -j24 USE_CLANG=1 COMPILE_WITH_TSAN=1 CRASH_TEST_EXT_ARGS="--duration=960 --cache_type=clock_cache" blackbox_crash_test_with_atomic_flush`` Reviewed By: pdillinger Differential Revision: D38226114 Pulled By: guidotag fbshipit-source-id: 18f6ab7e6214e11e9721d5ff289db1bf795d0008 --- cache/clock_cache.cc | 28 +++++++++++++++++++--------- cache/clock_cache.h | 4 +--- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 51984e1832..2275867d18 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -413,6 +413,15 @@ void ClockCacheShard::ApplyToSomeEntries( index_begin, index_end, false); } +ClockHandle* ClockCacheShard::DetachedInsert(ClockHandle* h) { + ClockHandle* e = new ClockHandle(); + *e = *h; + e->SetDetached(); + e->TryExternalRef(); + detached_usage_ += h->total_charge; + return e; +} + size_t ClockCacheShard::CalcEstimatedHandleCharge( size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { @@ -503,23 +512,24 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, } } } else { - ClockHandle* h; - if (occupancy_local + 1 > table_.GetOccupancyLimit()) { + ClockHandle* h = nullptr; + if (handle != nullptr && occupancy_local + 1 > table_.GetOccupancyLimit()) { // Even if the user wishes to overload the cache, we can't insert into // the hash table. Instead, we dynamically allocate a new handle. - h = new ClockHandle(); - *h = tmp; - h->SetDetached(); - h->TryExternalRef(); - detached_usage_ += h->total_charge; + h = DetachedInsert(&tmp); // TODO: Return special status? } else { // Insert into the cache. Note that the cache might get larger than its // capacity if not enough space was freed up. autovector deleted; h = table_.Insert(&tmp, &deleted, handle != nullptr); - assert(h != nullptr); // The occupancy is way below the table size, so - // this insertion should never fail. + if (h == nullptr && handle != nullptr) { + // The table is full. This can happen when many threads simultaneously + // attempt an insert, and the table is operating close to full capacity. + h = DetachedInsert(&tmp); + } + // Notice that if handle == nullptr, we don't insert the entry but still + // return ok. if (deleted.size() > 0) { s = Status::OkOverwritten(); } diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 64eeb6acc8..7a24b0bc66 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -770,9 +770,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard { friend class ClockCache; friend class ClockCacheTest; - // Free some space following strict clock policy until enough space - // to hold (usage_ + charge) is freed or there are no evictable elements. - void EvictFromClock(size_t charge, autovector* deleted); + ClockHandle* DetachedInsert(ClockHandle* h); // Returns the charge of a single handle. static size_t CalcEstimatedHandleCharge(