From 7c8ed6b082aa3c7a3402f18e50da4480421d08fd Mon Sep 17 00:00:00 2001 From: xdje42 Date: Tue, 16 Jul 2024 01:56:40 -0700 Subject: [PATCH] [FR] Add API to provide custom profilers #1807 (#1809) This API is akin to the MemoryManager API and lets tools provide their own profiler which is wrapped in the same way MemoryManager is wrapped. Namely, the profiler provides Start/Stop methods that are called at the start/end of running the benchmark in a separate pass. Co-authored-by: dominic <510002+dmah42@users.noreply.github.com> --- CONTRIBUTORS | 1 + docs/user_guide.md | 15 +++++++++ include/benchmark/benchmark.h | 20 ++++++++++++ src/benchmark.cc | 4 +++ src/benchmark_runner.cc | 59 +++++++++++++++++++++++++---------- src/benchmark_runner.h | 5 +++ test/CMakeLists.txt | 3 ++ test/profiler_manager_test.cc | 43 +++++++++++++++++++++++++ 8 files changed, 134 insertions(+), 16 deletions(-) create mode 100644 test/profiler_manager_test.cc diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 9ca2caa3..54aba7b5 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -42,6 +42,7 @@ Dominic Hamon Dominik Czarnota Dominik Korman Donald Aingworth +Doug Evans Eric Backus Eric Fiselier Eugene Zhuk diff --git a/docs/user_guide.md b/docs/user_guide.md index d87ccc40..e3826209 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc. This data will then be reported alongside other performance data, currently only when using JSON output. + + +## Profiling + +It's often useful to also profile benchmarks in particular ways, in addition to +CPU performance. For this reason, benchmark offers the `RegisterProfilerManager` +method that allows a custom `ProfilerManager` to be injected. + +If set, the `ProfilerManager::AfterSetupStart` and +`ProfilerManager::BeforeTeardownStop` methods will be called at the start and +end of a separate benchmark run to allow user code to collect and report +user-provided profile metrics. + +Output collected from this profiling run must be reported separately. + ## Using RegisterBenchmark(name, fn, args...) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 08cfe29d..7dd72e27 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -416,6 +416,26 @@ class MemoryManager { BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); +// If a ProfilerManager is registered (via RegisterProfilerManager()), the +// benchmark will be run an additional time under the profiler to collect and +// report profile metrics for the run of the benchmark. +class ProfilerManager { + public: + virtual ~ProfilerManager() {} + + // This is called after `Setup()` code and right before the benchmark is run. + virtual void AfterSetupStart() = 0; + + // This is called before `Teardown()` code and right after the benchmark + // completes. + virtual void BeforeTeardownStop() = 0; +}; + +// Register a ProfilerManager instance that will be used to collect and report +// profile measurements for benchmark runs. +BENCHMARK_EXPORT +void RegisterProfilerManager(ProfilerManager* profiler_manager); + // Add a key-value pair to output as part of the context stanza in the report. BENCHMARK_EXPORT void AddCustomContext(const std::string& key, const std::string& value); diff --git a/src/benchmark.cc b/src/benchmark.cc index 337bb3fa..374c5141 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } +void RegisterProfilerManager(ProfilerManager* manager) { + internal::profiler_manager = manager; +} + void AddCustomContext(const std::string& key, const std::string& value) { if (internal::global_context == nullptr) { internal::global_context = new std::map(); diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index a74bdadd..3a8c3076 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -62,6 +62,8 @@ namespace internal { MemoryManager* memory_manager = nullptr; +ProfilerManager* profiler_manager = nullptr; + namespace { static constexpr IterationCount kMaxIterations = 1000000000000; @@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() { } } +MemoryManager::Result* BenchmarkRunner::RunMemoryManager( + IterationCount memory_iterations) { + // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an + // optional so we don't have to own the Result here. + // Can't do it now due to cxx03. + memory_results.push_back(MemoryManager::Result()); + MemoryManager::Result* memory_result = &memory_results.back(); + memory_manager->Start(); + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); + manager->WaitForAllThreads(); + manager.reset(); + b.Teardown(); + memory_manager->Stop(*memory_result); + return memory_result; +} + +void BenchmarkRunner::RunProfilerManager() { + // TODO: Provide a way to specify the number of iterations. + IterationCount profile_iterations = 1; + std::unique_ptr manager; + manager.reset(new internal::ThreadManager(1)); + b.Setup(); + profiler_manager->AfterSetupStart(); + RunInThread(&b, profile_iterations, 0, manager.get(), + /*perf_counters_measurement_ptr=*/nullptr); + manager->WaitForAllThreads(); + profiler_manager->BeforeTeardownStop(); + manager.reset(); + b.Teardown(); +} + void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); @@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() { "then we should have accepted the current iteration run."); } - // Oh, one last thing, we need to also produce the 'memory measurements'.. + // Produce memory measurements if requested. MemoryManager::Result* memory_result = nullptr; IterationCount memory_iterations = 0; if (memory_manager != nullptr) { - // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an - // optional so we don't have to own the Result here. - // Can't do it now due to cxx03. - memory_results.push_back(MemoryManager::Result()); - memory_result = &memory_results.back(); // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. memory_iterations = std::min(16, iters); - memory_manager->Start(); - std::unique_ptr manager; - manager.reset(new internal::ThreadManager(1)); - b.Setup(); - RunInThread(&b, memory_iterations, 0, manager.get(), - perf_counters_measurement_ptr); - manager->WaitForAllThreads(); - manager.reset(); - b.Teardown(); - memory_manager->Stop(*memory_result); + memory_result = RunMemoryManager(memory_iterations); + } + + if (profiler_manager != nullptr) { + RunProfilerManager(); } // Ok, now actually report. diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index db2fa043..cd34d2d5 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters); namespace internal { extern MemoryManager* memory_manager; +extern ProfilerManager* profiler_manager; struct RunResults { std::vector non_aggregates; @@ -113,6 +114,10 @@ class BenchmarkRunner { }; IterationResults DoNIterations(); + MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations); + + void RunProfilerManager(); + IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1de175f9..815b5818 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous compile_output_test(memory_manager_test) benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s) +compile_output_test(profiler_manager_test) +benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s) + # MSVC does not allow to set the language standard to C++98/03. if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")) compile_benchmark_test(cxx03_test) diff --git a/test/profiler_manager_test.cc b/test/profiler_manager_test.cc new file mode 100644 index 00000000..1b3e36c3 --- /dev/null +++ b/test/profiler_manager_test.cc @@ -0,0 +1,43 @@ +// FIXME: WIP + +#include + +#include "benchmark/benchmark.h" +#include "output_test.h" + +class TestProfilerManager : public benchmark::ProfilerManager { + void AfterSetupStart() override {} + void BeforeTeardownStop() override {} +}; + +void BM_empty(benchmark::State& state) { + for (auto _ : state) { + auto iterations = state.iterations(); + benchmark::DoNotOptimize(iterations); + } +} +BENCHMARK(BM_empty); + +ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}}); +ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_empty\",$", MR_Next}, + {"\"run_type\": \"iteration\",$", MR_Next}, + {"\"repetitions\": 1,$", MR_Next}, + {"\"repetition_index\": 0,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}}); +ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}}); + +int main(int argc, char* argv[]) { + std::unique_ptr pm(new TestProfilerManager()); + + benchmark::RegisterProfilerManager(pm.get()); + RunOutputTests(argc, argv); + benchmark::RegisterProfilerManager(nullptr); +}