diff --git a/HISTORY.md b/HISTORY.md index 40a0141a7d..3154335ba9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,9 @@ * When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. * Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`. +### Public API Change +* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks. + ## 6.16.0 (12/18/2020) ### Behavior Changes * Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. diff --git a/include/rocksdb/write_buffer_manager.h b/include/rocksdb/write_buffer_manager.h index ae1c98cafb..c1d1300ae3 100644 --- a/include/rocksdb/write_buffer_manager.h +++ b/include/rocksdb/write_buffer_manager.h @@ -43,6 +43,9 @@ class WriteBufferManager { size_t mutable_memtable_memory_usage() const { return memory_active_.load(std::memory_order_relaxed); } + size_t dummy_entries_in_cache_usage() const { + return dummy_size_.load(std::memory_order_relaxed); + } size_t buffer_size() const { return buffer_size_; } // Should only be called from write thread @@ -93,6 +96,7 @@ class WriteBufferManager { std::atomic memory_used_; // Memory that hasn't been scheduled to free. std::atomic memory_active_; + std::atomic dummy_size_; struct CacheRep; std::unique_ptr cache_rep_; diff --git a/memtable/write_buffer_manager.cc b/memtable/write_buffer_manager.cc index 9b74708708..f6451032a2 100644 --- a/memtable/write_buffer_manager.cc +++ b/memtable/write_buffer_manager.cc @@ -54,6 +54,7 @@ WriteBufferManager::WriteBufferManager(size_t _buffer_size, mutable_limit_(buffer_size_ * 7 / 8), memory_used_(0), memory_active_(0), + dummy_size_(0), cache_rep_(nullptr) { #ifndef ROCKSDB_LITE if (cache) { @@ -104,6 +105,7 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) { // it in the future. cache_rep_->dummy_handles_.push_back(handle); cache_rep_->cache_allocated_size_ += kSizeDummyEntry; + dummy_size_.fetch_add(kSizeDummyEntry, std::memory_order_relaxed); } #else (void)mem; @@ -137,6 +139,7 @@ void WriteBufferManager::FreeMemWithCache(size_t mem) { } cache_rep_->dummy_handles_.pop_back(); cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; + dummy_size_.fetch_sub(kSizeDummyEntry, std::memory_order_relaxed); } #else (void)mem; diff --git a/memtable/write_buffer_manager_test.cc b/memtable/write_buffer_manager_test.cc index 0cdd7c4780..e9377a2a78 100644 --- a/memtable/write_buffer_manager_test.cc +++ b/memtable/write_buffer_manager_test.cc @@ -11,7 +11,7 @@ #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { - +const size_t kSizeDummyEntry = 256 * 1024; class WriteBufferManagerTest : public testing::Test {}; #ifndef ROCKSDB_LITE @@ -65,28 +65,35 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->ReserveMem(333 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000); + // 2 dummy entries are added for size 333 kb. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 2 * kSizeDummyEntry); // Allocate another 512KB wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000); + // 2 more dummy entries are added for size 512. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); // Allocate another 10MB wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); + // 40 more entries are added for size 10 * 1024 * 1024. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); // Free 1MB will not cause any change in cache cost wbf->FreeMem(1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); - + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); // Allocate another 41MB wbf->ReserveMem(41 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush()); @@ -94,7 +101,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->ScheduleFreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); - + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); // Still need flush as the hard limit hits ASSERT_TRUE(wbf->ShouldFlush()); @@ -102,6 +109,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->FreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 203 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); @@ -109,19 +117,23 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 202 * kSizeDummyEntry); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry); // Reserve 512KB will not cause any change in cache cost wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 200 * kSizeDummyEntry); // Destory write buffer manger should free everything wbf.reset(); @@ -137,6 +149,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 10 * 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); wbf->FreeMem(9 * 1024 * 1024); @@ -145,6 +158,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { } ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); } TEST_F(WriteBufferManagerTest, CacheFull) { @@ -156,16 +170,20 @@ TEST_F(WriteBufferManagerTest, CacheFull) { std::shared_ptr cache = NewLRUCache(lo); std::unique_ptr wbf(new WriteBufferManager(0, cache)); wbf->ReserveMem(10 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); size_t prev_pinned = cache->GetPinnedUsage(); ASSERT_GE(prev_pinned, 10 * 1024 * 1024); + // Some insert will fail wbf->ReserveMem(10 * 1024 * 1024); ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 80 * kSizeDummyEntry); // Increase capacity so next insert will succeed cache->SetCapacity(30 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 120 * kSizeDummyEntry); // Gradually release 20 MB for (int i = 0; i < 40; i++) { @@ -173,6 +191,7 @@ TEST_F(WriteBufferManagerTest, CacheFull) { } ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 95 * kSizeDummyEntry); } #endif // ROCKSDB_LITE