From 678ba5e41cc7ae62a7d99a8307c2477f4de6d5d2 Mon Sep 17 00:00:00 2001 From: matthewvon Date: Fri, 15 Oct 2021 13:05:17 -0700 Subject: [PATCH] SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR:
Total: 5105999 objects
 5003717  98.0%  98.0%  5009471  98.1% rocksdb::DBIter::MergeValuesNewToOld (inline)
   20260   0.4%  98.4%    20260   0.4% std::__cxx11::basic_string::_M_mutate
   15214   0.3%  98.7%    15214   0.3% rocksdb::UncompressBlockContentsForCompressionType (inline)
   13408   0.3%  99.0%    13408   0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline)
   12957   0.3%  99.2%    12957   0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline)
    9327   0.2%  99.4%     9327   0.2% std::_Rb_tree::_M_copy (inline)
    7691   0.2%  99.5%     9919   0.2% JVM_FindSignal
    2859   0.1%  99.6%     2859   0.1% rocksdb::Cleanable::RegisterCleanup
    2844   0.1%  99.7%     2844   0.1% std::map::operator[] (inline)
The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR:
Total: 104911 objects
   45090  43.0%  43.0%    45090  43.0% rocksdb::Cleanable::RegisterCleanup
   29995  28.6%  71.6%    29995  28.6% rocksdb::LRUCacheShard::Insert
   15229  14.5%  86.1%    15229  14.5% rocksdb::UncompressBlockContentsForCompressionType (inline)
    4373   4.2%  90.3%     4551   4.3% JVM_FindSignal
    2881   2.7%  93.0%     2881   2.7% rocksdb::::ReadBlockFromFile (inline)
    1162   1.1%  94.1%     1176   1.1% rocksdb::BlockFetcher::ReadBlockContents (inline)
    1036   1.0%  95.1%     1036   1.0% std::__cxx11::basic_string::_M_mutate
     869   0.8%  95.9%      869   0.8% std::vector::_M_realloc_insert (inline)
     806   0.8%  96.7%      806   0.8% SnmpAgent::GetVariables (inline)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d --- test_util/sync_point.cc | 2 +- test_util/sync_point.h | 13 ++++++++++++- test_util/sync_point_impl.cc | 18 +++++++++++------- test_util/sync_point_impl.h | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/test_util/sync_point.cc b/test_util/sync_point.cc index 4c71fc6bc4..067fc8234f 100644 --- a/test_util/sync_point.cc +++ b/test_util/sync_point.cc @@ -60,7 +60,7 @@ void SyncPoint::ClearTrace() { impl_->ClearTrace(); } -void SyncPoint::Process(const std::string& point, void* cb_arg) { +void SyncPoint::Process(const Slice& point, void* cb_arg) { impl_->Process(point, cb_arg); } diff --git a/test_util/sync_point.h b/test_util/sync_point.h index 775fd5c36e..410176dc82 100644 --- a/test_util/sync_point.h +++ b/test_util/sync_point.h @@ -5,6 +5,7 @@ #pragma once #include + #include #include #include @@ -12,6 +13,7 @@ #include #include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/slice.h" #ifdef NDEBUG // empty in release build @@ -117,7 +119,16 @@ class SyncPoint { // triggered by TEST_SYNC_POINT, blocking execution until all predecessors // are executed. // And/or call registered callback function, with argument `cb_arg` - void Process(const std::string& point, void* cb_arg = nullptr); + void Process(const Slice& point, void* cb_arg = nullptr); + + // template gets length of const string at compile time, + // avoiding strlen() at runtime + template + void Process(const char (&point)[kLen], void* cb_arg = nullptr) { + static_assert(kLen > 0, "Must not be empty"); + assert(point[kLen - 1] == '\0'); + Process(Slice(point, kLen - 1), cb_arg); + } // TODO: it might be useful to provide a function that blocks until all // sync points are cleared. diff --git a/test_util/sync_point_impl.cc b/test_util/sync_point_impl.cc index 1d87a05fe0..4add498896 100644 --- a/test_util/sync_point_impl.cc +++ b/test_util/sync_point_impl.cc @@ -101,19 +101,23 @@ void SyncPoint::Data::ClearAllCallBacks() { callbacks_.clear(); } -void SyncPoint::Data::Process(const std::string& point, void* cb_arg) { +void SyncPoint::Data::Process(const Slice& point, void* cb_arg) { if (!enabled_) { return; } + // Use a filter to prevent mutex lock if possible. if (!point_filter_.MayContain(point)) { return; } + // Must convert to std::string for remaining work. Take + // heap hit. + std::string point_string(point.ToString()); std::unique_lock lock(mutex_); auto thread_id = std::this_thread::get_id(); - auto marker_iter = markers_.find(point); + auto marker_iter = markers_.find(point_string); if (marker_iter != markers_.end()) { for (auto& marked_point : marker_iter->second) { marked_thread_id_.emplace(marked_point, thread_id); @@ -121,18 +125,18 @@ void SyncPoint::Data::Process(const std::string& point, void* cb_arg) { } } - if (DisabledByMarker(point, thread_id)) { + if (DisabledByMarker(point_string, thread_id)) { return; } - while (!PredecessorsAllCleared(point)) { + while (!PredecessorsAllCleared(point_string)) { cv_.wait(lock); - if (DisabledByMarker(point, thread_id)) { + if (DisabledByMarker(point_string, thread_id)) { return; } } - auto callback_pair = callbacks_.find(point); + auto callback_pair = callbacks_.find(point_string); if (callback_pair != callbacks_.end()) { num_callbacks_running_++; mutex_.unlock(); @@ -140,7 +144,7 @@ void SyncPoint::Data::Process(const std::string& point, void* cb_arg) { mutex_.lock(); num_callbacks_running_--; } - cleared_points_.insert(point); + cleared_points_.insert(point_string); cv_.notify_all(); } } // namespace ROCKSDB_NAMESPACE diff --git a/test_util/sync_point_impl.h b/test_util/sync_point_impl.h index ba818e381f..5260180409 100644 --- a/test_util/sync_point_impl.h +++ b/test_util/sync_point_impl.h @@ -95,7 +95,7 @@ struct SyncPoint::Data { return marked_point_iter != marked_thread_id_.end() && thread_id != marked_point_iter->second; } - void Process(const std::string& point, void* cb_arg); + void Process(const Slice& point, void* cb_arg); }; } // namespace ROCKSDB_NAMESPACE #endif // NDEBUG