Fix a bug that crashes the service when write buffer manager fails to insert to block cache (#6619)

Summary:
https://github.com/facebook/rocksdb/issues/6247 reports that when write buffer manager fails to insert the dummy entry to block cache, null pointer is still stored and used to release the handle and cause corruption. Fix the bug by not releasing it with null handle.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6619

Test Plan: Add a unit test that fails without the fix.

Reviewed By: ajkr

Differential Revision: D20776769

fbshipit-source-id: 4127fbd9f295a0a3e45774746ffcd91f939f6287
This commit is contained in:
sdong 2020-04-01 11:24:58 -07:00 committed by Facebook GitHub Bot
parent b5818f87f0
commit 57096ab13e
3 changed files with 48 additions and 3 deletions

View File

@ -3,6 +3,9 @@
### Behavior changes ### Behavior changes
* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. * Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly.
### Bug Fixes
* Fix a bug which might crash the service when write buffer manager fails to insert the dummy handle to the block cache.
## 6.9.0 (03/29/2020) ## 6.9.0 (03/29/2020)
### Public API Change ### Public API Change
* Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. * Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias.

View File

@ -69,7 +69,9 @@ WriteBufferManager::~WriteBufferManager() {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (cache_rep_) { if (cache_rep_) {
for (auto* handle : cache_rep_->dummy_handles_) { for (auto* handle : cache_rep_->dummy_handles_) {
cache_rep_->cache_->Release(handle, true); if (handle != nullptr) {
cache_rep_->cache_->Release(handle, true);
}
} }
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
@ -88,9 +90,16 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) {
while (new_mem_used > cache_rep_->cache_allocated_size_) { while (new_mem_used > cache_rep_->cache_allocated_size_) {
// Expand size by at least 256KB. // Expand size by at least 256KB.
// Add a dummy record to the cache // Add a dummy record to the cache
Cache::Handle* handle; Cache::Handle* handle = nullptr;
cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr, cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr,
kSizeDummyEntry, nullptr, &handle); kSizeDummyEntry, nullptr, &handle);
// We keep the handle even if insertion fails and a null handle is
// returned, so that when memory shrinks, we don't release extra
// entries from cache.
// Ideallly we should prevent this allocation from happening if
// this insertion fails. However, the callers to this code path
// are not able to handle failures properly. We'll need to improve
// it in the future.
cache_rep_->dummy_handles_.push_back(handle); cache_rep_->dummy_handles_.push_back(handle);
cache_rep_->cache_allocated_size_ += kSizeDummyEntry; cache_rep_->cache_allocated_size_ += kSizeDummyEntry;
} }
@ -119,7 +128,11 @@ void WriteBufferManager::FreeMemWithCache(size_t mem) {
if (new_mem_used < cache_rep_->cache_allocated_size_ / 4 * 3 && if (new_mem_used < cache_rep_->cache_allocated_size_ / 4 * 3 &&
cache_rep_->cache_allocated_size_ - kSizeDummyEntry > new_mem_used) { cache_rep_->cache_allocated_size_ - kSizeDummyEntry > new_mem_used) {
assert(!cache_rep_->dummy_handles_.empty()); assert(!cache_rep_->dummy_handles_.empty());
cache_rep_->cache_->Release(cache_rep_->dummy_handles_.back(), true); auto* handle = cache_rep_->dummy_handles_.back();
// If insert failed, handle is null so we should not release.
if (handle != nullptr) {
cache_rep_->cache_->Release(handle, true);
}
cache_rep_->dummy_handles_.pop_back(); cache_rep_->dummy_handles_.pop_back();
cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; cache_rep_->cache_allocated_size_ -= kSizeDummyEntry;
} }

View File

@ -146,6 +146,35 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) {
ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000);
} }
TEST_F(WriteBufferManagerTest, CacheFull) {
// 15MB cache size with strict capacity
LRUCacheOptions lo;
lo.capacity = 12 * 1024 * 1024;
lo.num_shard_bits = 0;
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::unique_ptr<WriteBufferManager> wbf(new WriteBufferManager(0, cache));
wbf->ReserveMem(10 * 1024 * 1024);
size_t prev_pinned = cache->GetPinnedUsage();
ASSERT_GE(prev_pinned, 10 * 1024 * 1024);
// Some insert will fail
wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024);
// Increase capacity so next insert will succeed
cache->SetCapacity(30 * 1024 * 1024);
wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024);
// Gradually release 20 MB
for (int i = 0; i < 40; i++) {
wbf->FreeMem(512 * 1024);
}
ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024);
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE