From beb44ec3ebf665f60b3aa9cccb99e7c305a7472f Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Tue, 16 Apr 2019 11:59:35 -0700 Subject: [PATCH] WriteBufferManager's dummy entry size to block cache 1MB -> 256KB (#5175) Summary: Dummy cache size of 1MB is too large for small block sizes. Our GetDefaultCacheShardBits() use min_shard_size = 512L * 1024L to determine number of shards, so 1MB will excceeds the size of the whole shard and make the cache excceeds the budget. Change it to 256KB accordingly. There shouldn't be obvious performance impact, since inserting a cache entry every 256KB of memtable inserts is still infrequently enough. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5175 Differential Revision: D14954289 Pulled By: siying fbshipit-source-id: 2c275255c1ac3992174e06529e44c55538325c94 --- HISTORY.md | 1 + db/db_test2.cc | 18 +++---- memtable/write_buffer_manager.cc | 4 +- memtable/write_buffer_manager_test.cc | 76 +++++++++++++-------------- 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 8074553d4b..1beb2b0c89 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -15,6 +15,7 @@ * Fix a bug in Encryption Env which could cause encrypted files to be read beyond file boundaries. * Fix a race condition between WritePrepared::Get and ::Put with duplicate keys. * Fix crash when memtable prefix bloom is enabled and read/write a key out of domain of prefix extractor. +* Adjust WriteBufferManager's dummy entry size to block cache from 1MB to 256KB. ## 6.1.0 (3/27/2019) diff --git a/db/db_test2.cc b/db/db_test2.cc index 388d3ccf52..d167e06133 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -200,7 +200,7 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) { // The total soft write buffer size is about 105000 std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); - ASSERT_LT(cache->GetUsage(), 1024 * 1024); + ASSERT_LT(cache->GetUsage(), 256 * 1024); if (use_old_interface_) { options.db_write_buffer_size = 120000; // this is the real limit @@ -236,14 +236,14 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) { ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); if (cost_cache_) { - ASSERT_GE(cache->GetUsage(), 1024 * 1024); - ASSERT_LE(cache->GetUsage(), 2 * 1024 * 1024); + ASSERT_GE(cache->GetUsage(), 256 * 1024); + ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024); } wait_flush(); ASSERT_OK(Put(0, Key(1), DummyString(60000), wo)); if (cost_cache_) { - ASSERT_GE(cache->GetUsage(), 1024 * 1024); - ASSERT_LE(cache->GetUsage(), 2 * 1024 * 1024); + ASSERT_GE(cache->GetUsage(), 256 * 1024); + ASSERT_LE(cache->GetUsage(), 2 * 256 * 1024); } wait_flush(); ASSERT_OK(Put(2, Key(1), DummyString(1), wo)); @@ -339,11 +339,11 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) { static_cast(2)); } if (cost_cache_) { - ASSERT_GE(cache->GetUsage(), 1024 * 1024); + ASSERT_GE(cache->GetUsage(), 256 * 1024); Close(); options.write_buffer_manager.reset(); last_options_.write_buffer_manager.reset(); - ASSERT_LT(cache->GetUsage(), 1024 * 1024); + ASSERT_LT(cache->GetUsage(), 256 * 1024); } rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } @@ -467,8 +467,8 @@ TEST_F(DBTest2, TestWriteBufferNoLimitWithCache) { Reopen(options); ASSERT_OK(Put("foo", "bar")); - // One dummy entry is 1MB. - ASSERT_GT(cache->GetUsage(), 500000); + // One dummy entry is 256KB. + ASSERT_GT(cache->GetUsage(), 128000); } namespace { diff --git a/memtable/write_buffer_manager.cc b/memtable/write_buffer_manager.cc index 7f2e664ab5..cf7f537642 100644 --- a/memtable/write_buffer_manager.cc +++ b/memtable/write_buffer_manager.cc @@ -14,7 +14,7 @@ namespace rocksdb { #ifndef ROCKSDB_LITE namespace { -const size_t kSizeDummyEntry = 1024 * 1024; +const size_t kSizeDummyEntry = 256 * 1024; // The key will be longer than keys for blocks in SST files so they won't // conflict. const size_t kCacheKeyPrefix = kMaxVarint64Length * 4 + 1; @@ -86,7 +86,7 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) { size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) + mem; memory_used_.store(new_mem_used, std::memory_order_relaxed); while (new_mem_used > cache_rep_->cache_allocated_size_) { - // Expand size by at least 1MB. + // Expand size by at least 256KB. // Add a dummy record to the cache Cache::Handle* handle; cache_rep_->cache_->Insert(cache_rep_->GetNextCacheKey(), nullptr, diff --git a/memtable/write_buffer_manager_test.cc b/memtable/write_buffer_manager_test.cc index 0fc9fd06c7..3c89c8095e 100644 --- a/memtable/write_buffer_manager_test.cc +++ b/memtable/write_buffer_manager_test.cc @@ -57,67 +57,67 @@ TEST_F(WriteBufferManagerTest, CacheCost) { std::unique_ptr wbf( new WriteBufferManager(50 * 1024 * 1024, cache)); - // Allocate 1.5MB will allocate 2MB - wbf->ReserveMem(1536 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 2 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 2 * 1024 * 1024 + 10000); + // Allocate 333KB will allocate 512KB + wbf->ReserveMem(333 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000); - // Allocate another 2MB - wbf->ReserveMem(2 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 4 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 4 * 1024 * 1024 + 10000); + // Allocate another 512KB + wbf->ReserveMem(512 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000); - // Allocate another 20MB - wbf->ReserveMem(20 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 24 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 24 * 1024 * 1024 + 10000); + // Allocate another 10MB + wbf->ReserveMem(10 * 1024 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); - // Free 2MB will not cause any change in cache cost - wbf->FreeMem(2 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 24 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 24 * 1024 * 1024 + 10000); + // Free 1MB will not cause any change in cache cost + wbf->FreeMem(1024 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); ASSERT_FALSE(wbf->ShouldFlush()); - // Allocate another 30MB - wbf->ReserveMem(30 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 52 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 52 * 1024 * 1024 + 10000); + // Allocate another 41MB + wbf->ReserveMem(41 * 1024 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush()); wbf->ScheduleFreeMem(20 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 52 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 52 * 1024 * 1024 + 10000); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); // Still need flush as the hard limit hits ASSERT_TRUE(wbf->ShouldFlush()); - // Free 20MB will releae 1MB from cache + // Free 20MB will releae 256KB from cache wbf->FreeMem(20 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000); ASSERT_FALSE(wbf->ShouldFlush()); - // Every free will release 1MB if still not hit 3/4 + // Every free will release 256KB if still not hit 3/4 wbf->FreeMem(16 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 50 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 50 * 1024 * 1024 + 10000); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000); wbf->FreeMem(16 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 49 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 49 * 1024 * 1024 + 10000); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); - // Free 2MB will not cause any change in cache cost - wbf->ReserveMem(2 * 1024 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 49 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 49 * 1024 * 1024 + 10000); + // Reserve 512KB will not cause any change in cache cost + wbf->ReserveMem(512 * 1024); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); wbf->FreeMem(16 * 1024); - ASSERT_GE(cache->GetPinnedUsage(), 48 * 1024 * 1024); - ASSERT_LT(cache->GetPinnedUsage(), 48 * 1024 * 1024 + 10000); + ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024); + ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000); // Destory write buffer manger should free everything wbf.reset(); @@ -136,8 +136,8 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { ASSERT_FALSE(wbf->ShouldFlush()); wbf->FreeMem(9 * 1024 * 1024); - for (int i = 0; i < 10; i++) { - wbf->FreeMem(16 * 1024); + for (int i = 0; i < 40; i++) { + wbf->FreeMem(4 * 1024); } ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000);