From 57096ab13e9d373666f099f19d6eb73c6613e797 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 1 Apr 2020 11:24:58 -0700 Subject: [PATCH] Fix a bug that crashes the service when write buffer manager fails to insert to block cache (#6619) Summary: https://github.com/facebook/rocksdb/issues/6247 reports that when write buffer manager fails to insert the dummy entry to block cache, null pointer is still stored and used to release the handle and cause corruption. Fix the bug by not releasing it with null handle. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6619 Test Plan: Add a unit test that fails without the fix. Reviewed By: ajkr Differential Revision: D20776769 fbshipit-source-id: 4127fbd9f295a0a3e45774746ffcd91f939f6287 --- HISTORY.md | 3 +++ memtable/write_buffer_manager.cc | 19 +++++++++++++++--- memtable/write_buffer_manager_test.cc | 29 +++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index fd9292bb5b..f9b8fcb0ee 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,9 @@ ### Behavior changes * Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. +### Bug Fixes +* Fix a bug which might crash the service when write buffer manager fails to insert the dummy handle to the block cache. + ## 6.9.0 (03/29/2020) ### Public API Change * Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. diff --git a/memtable/write_buffer_manager.cc b/memtable/write_buffer_manager.cc index 8a2dc3b8b9..23ca1d7498 100644 --- a/memtable/write_buffer_manager.cc +++ b/memtable/write_buffer_manager.cc @@ -69,7 +69,9 @@ WriteBufferManager::~WriteBufferManager() { #ifndef ROCKSDB_LITE if (cache_rep_) { for (auto* handle : cache_rep_->dummy_handles_) { - cache_rep_->cache_->Release(handle, true); + if (handle != nullptr) { + cache_rep_->cache_->Release(handle, true); + } } } #endif // ROCKSDB_LITE @@ -88,9 +90,16 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) { while (new_mem_used > cache_rep_->cache_allocated_size_) { // Expand size by at least 256KB. // Add a dummy record to the cache - Cache::Handle* handle; + Cache::Handle* handle = nullptr; cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr, kSizeDummyEntry, nullptr, &handle); + // We keep the handle even if insertion fails and a null handle is + // returned, so that when memory shrinks, we don't release extra + // entries from cache. + // Ideallly we should prevent this allocation from happening if + // this insertion fails. However, the callers to this code path + // are not able to handle failures properly. We'll need to improve + // it in the future. cache_rep_->dummy_handles_.push_back(handle); cache_rep_->cache_allocated_size_ += kSizeDummyEntry; } @@ -119,7 +128,11 @@ void WriteBufferManager::FreeMemWithCache(size_t mem) { if (new_mem_used < cache_rep_->cache_allocated_size_ / 4 * 3 && cache_rep_->cache_allocated_size_ - kSizeDummyEntry > new_mem_used) { assert(!cache_rep_->dummy_handles_.empty()); - cache_rep_->cache_->Release(cache_rep_->dummy_handles_.back(), true); + auto* handle = cache_rep_->dummy_handles_.back(); + // If insert failed, handle is null so we should not release. + if (handle != nullptr) { + cache_rep_->cache_->Release(handle, true); + } cache_rep_->dummy_handles_.pop_back(); cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; } diff --git a/memtable/write_buffer_manager_test.cc b/memtable/write_buffer_manager_test.cc index 4ea52348f1..0cdd7c4780 100644 --- a/memtable/write_buffer_manager_test.cc +++ b/memtable/write_buffer_manager_test.cc @@ -146,6 +146,35 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); } + +TEST_F(WriteBufferManagerTest, CacheFull) { + // 15MB cache size with strict capacity + LRUCacheOptions lo; + lo.capacity = 12 * 1024 * 1024; + lo.num_shard_bits = 0; + lo.strict_capacity_limit = true; + std::shared_ptr cache = NewLRUCache(lo); + std::unique_ptr wbf(new WriteBufferManager(0, cache)); + wbf->ReserveMem(10 * 1024 * 1024); + size_t prev_pinned = cache->GetPinnedUsage(); + ASSERT_GE(prev_pinned, 10 * 1024 * 1024); + // Some insert will fail + wbf->ReserveMem(10 * 1024 * 1024); + ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); + + // Increase capacity so next insert will succeed + cache->SetCapacity(30 * 1024 * 1024); + wbf->ReserveMem(10 * 1024 * 1024); + ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024); + + // Gradually release 20 MB + for (int i = 0; i < 40; i++) { + wbf->FreeMem(512 * 1024); + } + ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024); +} + #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE