From bd8404feffa7a8825e7ab79497c1c74cdc83f7f3 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 13 Dec 2019 19:09:53 -0800 Subject: [PATCH] Do not schedule memtable trimming if there is no history (#6177) Summary: We have observed an increase in CPU load caused by frequent calls to `ColumnFamilyData::InstallSuperVersion` from `DBImpl::TrimMemtableHistory` when using `max_write_buffer_size_to_maintain` to limit the amount of memtable history maintained for transaction conflict checking. Part of the issue is that trimming can potentially be scheduled even if there is no memtable history. The patch adds a check that fixes this. See also https://github.com/facebook/rocksdb/pull/6169. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6177 Test Plan: Compared `perf` output for ``` ./db_bench -benchmarks=randomtransaction -optimistic_transaction_db=1 -statistics -stats_interval_seconds=1 -duration=90 -num=500000 --max_write_buffer_size_to_maintain=16000000 --transaction_set_snapshot=1 --threads=32 ``` before and after the change. There is a significant reduction for the call chain `rocksdb::DBImpl::TrimMemtableHistory` -> `rocksdb::ColumnFamilyData::InstallSuperVersion` -> `rocksdb::ThreadLocalPtr::StaticMeta::Scrape` even without https://github.com/facebook/rocksdb/pull/6169. Differential Revision: D19057445 Pulled By: ltamasi fbshipit-source-id: dff81882d7b280e17eda7d9b072a2d4882c50f79 --- db/memtable.h | 2 +- db/memtable_list.cc | 4 ++-- db/memtable_list.h | 4 ++-- db/write_batch.cc | 30 ++++++++++++++++++++++-------- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/db/memtable.h b/db/memtable.h index 0aeadce80c..961707008a 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -138,7 +138,7 @@ class MemTable { // As a cheap version of `ApproximateMemoryUsage()`, this function doens't // require external synchronization. The value may be less accurate though - size_t ApproximateMemoryUsageFast() { + size_t ApproximateMemoryUsageFast() const { return approximate_memory_usage_.load(std::memory_order_relaxed); } diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 93ab2e77c4..2d4c514e16 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -280,7 +280,7 @@ void MemTableListVersion::Remove(MemTable* m, } // return the total memory usage assuming the oldest flushed memtable is dropped -size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() { +size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() const { size_t total_memtable_size = 0; for (auto& memtable : memlist_) { total_memtable_size += memtable->ApproximateMemoryUsage(); @@ -566,7 +566,7 @@ size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() { size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; } -size_t MemTableList::ApproximateMemoryUsageExcludingLast() { +size_t MemTableList::ApproximateMemoryUsageExcludingLast() const { size_t usage = current_memory_usage_excluding_last_.load(std::memory_order_relaxed); return usage; diff --git a/db/memtable_list.h b/db/memtable_list.h index d78a8b5ea9..aad4fddea3 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -162,7 +162,7 @@ class MemTableListVersion { // excluding the last MemTable in memlist_history_. The reason for excluding // the last MemTable is to see if dropping the last MemTable will keep total // memory usage above or equal to max_write_buffer_size_to_maintain_ - size_t ApproximateMemoryUsageExcludingLast(); + size_t ApproximateMemoryUsageExcludingLast() const; bool MemtableLimitExceeded(size_t usage); @@ -267,7 +267,7 @@ class MemTableList { size_t ApproximateMemoryUsage(); // Returns the cached current_memory_usage_excluding_last_ value - size_t ApproximateMemoryUsageExcludingLast(); + size_t ApproximateMemoryUsageExcludingLast() const; // Update current_memory_usage_excluding_last_ from MemtableListVersion void UpdateMemoryUsageExcludingLast(); diff --git a/db/write_batch.cc b/db/write_batch.cc index 350c1a1c07..71104ea357 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -1784,14 +1784,28 @@ class MemTableInserter : public WriteBatch::Handler { // check if memtable_list size exceeds max_write_buffer_size_to_maintain if (trim_history_scheduler_ != nullptr) { auto* cfd = cf_mems_->current(); - assert(cfd != nullptr); - if (cfd->ioptions()->max_write_buffer_size_to_maintain > 0 && - cfd->mem()->ApproximateMemoryUsageFast() + - cfd->imm()->ApproximateMemoryUsageExcludingLast() >= - static_cast( - cfd->ioptions()->max_write_buffer_size_to_maintain) && - cfd->imm()->MarkTrimHistoryNeeded()) { - trim_history_scheduler_->ScheduleWork(cfd); + + assert(cfd); + assert(cfd->ioptions()); + + const size_t size_to_maintain = static_cast( + cfd->ioptions()->max_write_buffer_size_to_maintain); + + if (size_to_maintain > 0) { + MemTableList* const imm = cfd->imm(); + assert(imm); + + if (imm->NumFlushed() > 0) { + const MemTable* const mem = cfd->mem(); + assert(mem); + + if (mem->ApproximateMemoryUsageFast() + + imm->ApproximateMemoryUsageExcludingLast() >= + size_to_maintain && + imm->MarkTrimHistoryNeeded()) { + trim_history_scheduler_->ScheduleWork(cfd); + } + } } } }