[FR] Add API to provide custom profilers #1807 (#1809)

This API is akin to the MemoryManager API and lets tools provide
their own profiler which is wrapped in the same way MemoryManager is
wrapped. Namely, the profiler provides Start/Stop methods that are called
at the start/end of running the benchmark in a separate pass.

Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
This commit is contained in:
xdje42 2024-07-16 01:56:40 -07:00 committed by GitHub
parent d2cd246e19
commit 7c8ed6b082
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 134 additions and 16 deletions

View File

@ -42,6 +42,7 @@ Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com> Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com> Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com> Donald Aingworth <donalds_junk_mail@yahoo.com>
Doug Evans <xdje42@gmail.com>
Eric Backus <eric_backus@alum.mit.edu> Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca> Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com> Eugene Zhuk <eugene.zhuk@gmail.com>

View File

@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc.
This data will then be reported alongside other performance data, currently This data will then be reported alongside other performance data, currently
only when using JSON output. only when using JSON output.
<a name="profiling" />
## Profiling
It's often useful to also profile benchmarks in particular ways, in addition to
CPU performance. For this reason, benchmark offers the `RegisterProfilerManager`
method that allows a custom `ProfilerManager` to be injected.
If set, the `ProfilerManager::AfterSetupStart` and
`ProfilerManager::BeforeTeardownStop` methods will be called at the start and
end of a separate benchmark run to allow user code to collect and report
user-provided profile metrics.
Output collected from this profiling run must be reported separately.
<a name="using-register-benchmark" /> <a name="using-register-benchmark" />
## Using RegisterBenchmark(name, fn, args...) ## Using RegisterBenchmark(name, fn, args...)

View File

@ -416,6 +416,26 @@ class MemoryManager {
BENCHMARK_EXPORT BENCHMARK_EXPORT
void RegisterMemoryManager(MemoryManager* memory_manager); void RegisterMemoryManager(MemoryManager* memory_manager);
// If a ProfilerManager is registered (via RegisterProfilerManager()), the
// benchmark will be run an additional time under the profiler to collect and
// report profile metrics for the run of the benchmark.
class ProfilerManager {
public:
virtual ~ProfilerManager() {}
// This is called after `Setup()` code and right before the benchmark is run.
virtual void AfterSetupStart() = 0;
// This is called before `Teardown()` code and right after the benchmark
// completes.
virtual void BeforeTeardownStop() = 0;
};
// Register a ProfilerManager instance that will be used to collect and report
// profile measurements for benchmark runs.
BENCHMARK_EXPORT
void RegisterProfilerManager(ProfilerManager* profiler_manager);
// Add a key-value pair to output as part of the context stanza in the report. // Add a key-value pair to output as part of the context stanza in the report.
BENCHMARK_EXPORT BENCHMARK_EXPORT
void AddCustomContext(const std::string& key, const std::string& value); void AddCustomContext(const std::string& key, const std::string& value);

View File

@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager; internal::memory_manager = manager;
} }
void RegisterProfilerManager(ProfilerManager* manager) {
internal::profiler_manager = manager;
}
void AddCustomContext(const std::string& key, const std::string& value) { void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) { if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>(); internal::global_context = new std::map<std::string, std::string>();

View File

@ -62,6 +62,8 @@ namespace internal {
MemoryManager* memory_manager = nullptr; MemoryManager* memory_manager = nullptr;
ProfilerManager* profiler_manager = nullptr;
namespace { namespace {
static constexpr IterationCount kMaxIterations = 1000000000000; static constexpr IterationCount kMaxIterations = 1000000000000;
@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() {
} }
} }
MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
IterationCount memory_iterations) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
MemoryManager::Result* memory_result = &memory_results.back();
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
return memory_result;
}
void BenchmarkRunner::RunProfilerManager() {
// TODO: Provide a way to specify the number of iterations.
IterationCount profile_iterations = 1;
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
profiler_manager->AfterSetupStart();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr);
manager->WaitForAllThreads();
profiler_manager->BeforeTeardownStop();
manager.reset();
b.Teardown();
}
void BenchmarkRunner::DoOneRepetition() { void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?"); assert(HasRepeatsRemaining() && "Already done all repetitions?");
@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() {
"then we should have accepted the current iteration run."); "then we should have accepted the current iteration run.");
} }
// Oh, one last thing, we need to also produce the 'memory measurements'.. // Produce memory measurements if requested.
MemoryManager::Result* memory_result = nullptr; MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0; IterationCount memory_iterations = 0;
if (memory_manager != nullptr) { if (memory_manager != nullptr) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
memory_result = &memory_results.back();
// Only run a few iterations to reduce the impact of one-time // Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed. // allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters); memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start(); memory_result = RunMemoryManager(memory_iterations);
std::unique_ptr<internal::ThreadManager> manager; }
manager.reset(new internal::ThreadManager(1));
b.Setup(); if (profiler_manager != nullptr) {
RunInThread(&b, memory_iterations, 0, manager.get(), RunProfilerManager();
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
} }
// Ok, now actually report. // Ok, now actually report.

View File

@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
namespace internal { namespace internal {
extern MemoryManager* memory_manager; extern MemoryManager* memory_manager;
extern ProfilerManager* profiler_manager;
struct RunResults { struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates; std::vector<BenchmarkReporter::Run> non_aggregates;
@ -113,6 +114,10 @@ class BenchmarkRunner {
}; };
IterationResults DoNIterations(); IterationResults DoNIterations();
MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
void RunProfilerManager();
IterationCount PredictNumItersNeeded(const IterationResults& i) const; IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const;

View File

@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous
compile_output_test(memory_manager_test) compile_output_test(memory_manager_test)
benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
compile_output_test(profiler_manager_test)
benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)
# MSVC does not allow to set the language standard to C++98/03. # MSVC does not allow to set the language standard to C++98/03.
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")) if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
compile_benchmark_test(cxx03_test) compile_benchmark_test(cxx03_test)

View File

@ -0,0 +1,43 @@
// FIXME: WIP
#include <memory>
#include "benchmark/benchmark.h"
#include "output_test.h"
class TestProfilerManager : public benchmark::ProfilerManager {
void AfterSetupStart() override {}
void BeforeTeardownStop() override {}
};
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
auto iterations = state.iterations();
benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
{"\"family_index\": 0,$", MR_Next},
{"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_empty\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
{"\"time_unit\": \"ns\"$", MR_Next},
{"}", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});
int main(int argc, char* argv[]) {
std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
benchmark::RegisterProfilerManager(pm.get());
RunOutputTests(argc, argv);
benchmark::RegisterProfilerManager(nullptr);
}