rocksdb/test_util/sync_point_impl.cc

152 lines
4.3 KiB
C++
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "test_util/sync_point_impl.h"
#ifndef NDEBUG
namespace ROCKSDB_NAMESPACE {
KillPoint* KillPoint::GetInstance() {
static KillPoint kp;
return &kp;
}
void KillPoint::TestKillRandom(std::string kill_point, int odds_weight,
const std::string& srcfile, int srcline) {
if (rocksdb_kill_odds <= 0) {
return;
}
int odds = rocksdb_kill_odds * odds_weight;
for (auto& p : rocksdb_kill_exclude_prefixes) {
if (kill_point.substr(0, p.length()) == p) {
return;
}
}
assert(odds > 0);
if (odds % 7 == 0) {
// class Random uses multiplier 16807, which is 7^5. If odds are
// multiplier of 7, there might be limited values generated.
odds++;
}
auto* r = Random::GetTLSInstance();
bool crash = r->OneIn(odds);
if (crash) {
port::Crash(srcfile, srcline);
}
}
void SyncPoint::Data::LoadDependency(const std::vector<SyncPointPair>& dependencies) {
std::lock_guard<std::mutex> lock(mutex_);
successors_.clear();
predecessors_.clear();
cleared_points_.clear();
for (const auto& dependency : dependencies) {
successors_[dependency.predecessor].push_back(dependency.successor);
predecessors_[dependency.successor].push_back(dependency.predecessor);
point_filter_.Add(dependency.successor);
point_filter_.Add(dependency.predecessor);
}
cv_.notify_all();
}
void SyncPoint::Data::LoadDependencyAndMarkers(
const std::vector<SyncPointPair>& dependencies,
const std::vector<SyncPointPair>& markers) {
std::lock_guard<std::mutex> lock(mutex_);
successors_.clear();
predecessors_.clear();
cleared_points_.clear();
markers_.clear();
marked_thread_id_.clear();
for (const auto& dependency : dependencies) {
successors_[dependency.predecessor].push_back(dependency.successor);
predecessors_[dependency.successor].push_back(dependency.predecessor);
point_filter_.Add(dependency.successor);
point_filter_.Add(dependency.predecessor);
}
for (const auto& marker : markers) {
successors_[marker.predecessor].push_back(marker.successor);
predecessors_[marker.successor].push_back(marker.predecessor);
markers_[marker.predecessor].push_back(marker.successor);
point_filter_.Add(marker.predecessor);
point_filter_.Add(marker.successor);
}
cv_.notify_all();
}
bool SyncPoint::Data::PredecessorsAllCleared(const std::string& point) {
for (const auto& pred : predecessors_[point]) {
if (cleared_points_.count(pred) == 0) {
return false;
}
}
return true;
}
void SyncPoint::Data::ClearCallBack(const std::string& point) {
std::unique_lock<std::mutex> lock(mutex_);
while (num_callbacks_running_ > 0) {
cv_.wait(lock);
}
callbacks_.erase(point);
}
void SyncPoint::Data::ClearAllCallBacks() {
std::unique_lock<std::mutex> lock(mutex_);
while (num_callbacks_running_ > 0) {
cv_.wait(lock);
}
callbacks_.clear();
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
void SyncPoint::Data::Process(const Slice& point, void* cb_arg) {
if (!enabled_) {
return;
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
// Use a filter to prevent mutex lock if possible.
if (!point_filter_.MayContain(point)) {
return;
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
// Must convert to std::string for remaining work. Take
// heap hit.
std::string point_string(point.ToString());
std::unique_lock<std::mutex> lock(mutex_);
auto thread_id = std::this_thread::get_id();
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
auto marker_iter = markers_.find(point_string);
if (marker_iter != markers_.end()) {
for (auto& marked_point : marker_iter->second) {
marked_thread_id_.emplace(marked_point, thread_id);
point_filter_.Add(marked_point);
}
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
if (DisabledByMarker(point_string, thread_id)) {
return;
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
while (!PredecessorsAllCleared(point_string)) {
cv_.wait(lock);
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
if (DisabledByMarker(point_string, thread_id)) {
return;
}
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
auto callback_pair = callbacks_.find(point_string);
if (callback_pair != callbacks_.end()) {
num_callbacks_running_++;
mutex_.unlock();
callback_pair->second(cb_arg);
mutex_.lock();
num_callbacks_running_--;
}
SyncPoint::Process thrashes heap ... fix it (#9023) Summary: The first parameter of SyncPoint::Process is "const std::string&". The majority, maybe all, of the actual calls to this function use a "const char *". The conversion before entering the function requires a construction of a std::string object on the heap. This std::object is then typically not needed because first use of the string is a rocksdb::Slice which has a less costly conversion of char * to slice. Example: We have a load and iterate test. The test loads 10m keys and iterates most via 10 rocksdb::Iterator objects. We used TCMALLOC to gather information about allocation and space usage during iterators. - Before this PR: test took 32 min 17 sec - After this PR: test took 1 min 14 sec The TCMALLOC top object list before this PR: <pre> Total: 5105999 objects 5003717 98.0% 98.0% 5009471 98.1% rocksdb::DBIter::MergeValuesNewToOld (inline) 20260 0.4% 98.4% 20260 0.4% std::__cxx11::basic_string::_M_mutate 15214 0.3% 98.7% 15214 0.3% rocksdb::UncompressBlockContentsForCompressionType (inline) 13408 0.3% 99.0% 13408 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.416] (inline) 12957 0.3% 99.2% 12957 0.3% std::_Rb_tree::_M_emplace_hint_unique [clone .constprop.405] (inline) 9327 0.2% 99.4% 9327 0.2% std::_Rb_tree::_M_copy (inline) 7691 0.2% 99.5% 9919 0.2% JVM_FindSignal 2859 0.1% 99.6% 2859 0.1% rocksdb::Cleanable::RegisterCleanup 2844 0.1% 99.7% 2844 0.1% std::map::operator[] (inline) </pre> The "MergeValuesNewToOld (inline)" objects are the #define wrappers to SyncPoint::Process. We discovered this in a 5.18 rocksdb release. There TCMALLOC was more specific that std::basic_string was being constructed. I believe that was before SyncPoint::Process was declared inline in subsequent releases. The TCMALLOC top object list after this PR: <pre> Total: 104911 objects 45090 43.0% 43.0% 45090 43.0% rocksdb::Cleanable::RegisterCleanup 29995 28.6% 71.6% 29995 28.6% rocksdb::LRUCacheShard::Insert 15229 14.5% 86.1% 15229 14.5% rocksdb::UncompressBlockContentsForCompressionType (inline) 4373 4.2% 90.3% 4551 4.3% JVM_FindSignal 2881 2.7% 93.0% 2881 2.7% rocksdb::::ReadBlockFromFile (inline) 1162 1.1% 94.1% 1176 1.1% rocksdb::BlockFetcher::ReadBlockContents (inline) 1036 1.0% 95.1% 1036 1.0% std::__cxx11::basic_string::_M_mutate 869 0.8% 95.9% 869 0.8% std::vector::_M_realloc_insert (inline) 806 0.8% 96.7% 806 0.8% SnmpAgent::GetVariables (inline) </pre> Pull Request resolved: https://github.com/facebook/rocksdb/pull/9023 Reviewed By: pdillinger Differential Revision: D31610907 Pulled By: mrambacher fbshipit-source-id: 574ff51b639dd46ad253a8e664a575f06b7cc85d
2021-10-15 20:05:17 +00:00
cleared_points_.insert(point_string);
cv_.notify_all();
}
} // namespace ROCKSDB_NAMESPACE
#endif