Fix `max_successive_merges` counting CPU overhead regression (#12546)

Summary:
In https://github.com/facebook/rocksdb/issues/12365 we made `max_successive_merges` non-strict by default. Before https://github.com/facebook/rocksdb/issues/12365, `CountSuccessiveMergeEntries()`'s scan was implicitly limited to `max_successive_merges` entries for a given key, because after that the merge operator would be invoked and the merge chain would be collapsed. After https://github.com/facebook/rocksdb/issues/12365, the merge chain will not be collapsed no matter how long it is when the chain's operands are not all in memory. Since `CountSuccessiveMergeEntries()` scanned the whole merge chain, https://github.com/facebook/rocksdb/issues/12365 had a side effect that it would scan more memtable entries. This PR introduces a limit so it won't scan more entries than it could before.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/12546

Reviewed By: jaykorean

Differential Revision: D56193693

Pulled By: ajkr

fbshipit-source-id: b070ba0703ef733e0ff230f89cd5cca5233b84da
This commit is contained in:
Andrew Kryczka 2024-04-17 12:11:24 -07:00 committed by Facebook GitHub Bot
parent 02ea0d6367
commit 7027265417
4 changed files with 12 additions and 7 deletions

View File

@ -1621,7 +1621,8 @@ Status MemTable::UpdateCallback(SequenceNumber seq, const Slice& key,
return Status::NotFound(); return Status::NotFound();
} }
size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) { size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key,
size_t limit) {
Slice memkey = key.memtable_key(); Slice memkey = key.memtable_key();
// A total ordered iterator is costly for some memtablerep (prefix aware // A total ordered iterator is costly for some memtablerep (prefix aware
@ -1633,7 +1634,7 @@ size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) {
size_t num_successive_merges = 0; size_t num_successive_merges = 0;
for (; iter->Valid(); iter->Next()) { for (; iter->Valid() && num_successive_merges < limit; iter->Next()) {
const char* entry = iter->key(); const char* entry = iter->key();
uint32_t key_length = 0; uint32_t key_length = 0;
const char* iter_key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length); const char* iter_key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);

View File

@ -326,9 +326,10 @@ class MemTable {
const ProtectionInfoKVOS64* kv_prot_info); const ProtectionInfoKVOS64* kv_prot_info);
// Returns the number of successive merge entries starting from the newest // Returns the number of successive merge entries starting from the newest
// entry for the key up to the last non-merge entry or last entry for the // entry for the key. The count ends when the oldest entry in the memtable
// key in the memtable. // with which the newest entry would be merged is reached, or the count
size_t CountSuccessiveMergeEntries(const LookupKey& key); // reaches `limit`.
size_t CountSuccessiveMergeEntries(const LookupKey& key, size_t limit);
// Update counters and flush status after inserting a whole write batch // Update counters and flush status after inserting a whole write batch
// Used in concurrent memtable inserts. // Used in concurrent memtable inserts.

View File

@ -2623,8 +2623,10 @@ class MemTableInserter : public WriteBatch::Handler {
LookupKey lkey(key, sequence_); LookupKey lkey(key, sequence_);
// Count the number of successive merges at the head // Count the number of successive merges at the head
// of the key in the memtable // of the key in the memtable. Limit the count to the threshold for
size_t num_merges = mem->CountSuccessiveMergeEntries(lkey); // triggering merge to prevent unnecessary counting overhead.
size_t num_merges = mem->CountSuccessiveMergeEntries(
lkey, moptions->max_successive_merges /* limit */);
if (num_merges >= moptions->max_successive_merges) { if (num_merges >= moptions->max_successive_merges) {
perform_merge = true; perform_merge = true;

View File

@ -0,0 +1 @@
* Fixed a regression when `ColumnFamilyOptions::max_successive_merges > 0` where the CPU overhead for deciding whether to merge could have increased unless the user had set the option `ColumnFamilyOptions::strict_max_successive_merges`