[FR] Add API to provide custom profilers #1807 (#1809)

This API is akin to the MemoryManager API and lets tools provide
their own profiler which is wrapped in the same way MemoryManager is
wrapped. Namely, the profiler provides Start/Stop methods that are called
at the start/end of running the benchmark in a separate pass.

Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
This commit is contained in:
xdje42 2024-07-16 01:56:40 -07:00 committed by GitHub
parent d2cd246e19
commit 7c8ed6b082
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 134 additions and 16 deletions

View File

@ -42,6 +42,7 @@ Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Doug Evans <xdje42@gmail.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>

View File

@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc.
This data will then be reported alongside other performance data, currently
only when using JSON output.
<a name="profiling" />
## Profiling
It's often useful to also profile benchmarks in particular ways, in addition to
CPU performance. For this reason, benchmark offers the `RegisterProfilerManager`
method that allows a custom `ProfilerManager` to be injected.
If set, the `ProfilerManager::AfterSetupStart` and
`ProfilerManager::BeforeTeardownStop` methods will be called at the start and
end of a separate benchmark run to allow user code to collect and report
user-provided profile metrics.
Output collected from this profiling run must be reported separately.
<a name="using-register-benchmark" />
## Using RegisterBenchmark(name, fn, args...)

View File

@ -416,6 +416,26 @@ class MemoryManager {
BENCHMARK_EXPORT
void RegisterMemoryManager(MemoryManager* memory_manager);
// If a ProfilerManager is registered (via RegisterProfilerManager()), the
// benchmark will be run an additional time under the profiler to collect and
// report profile metrics for the run of the benchmark.
class ProfilerManager {
public:
virtual ~ProfilerManager() {}
// This is called after `Setup()` code and right before the benchmark is run.
virtual void AfterSetupStart() = 0;
// This is called before `Teardown()` code and right after the benchmark
// completes.
virtual void BeforeTeardownStop() = 0;
};
// Register a ProfilerManager instance that will be used to collect and report
// profile measurements for benchmark runs.
BENCHMARK_EXPORT
void RegisterProfilerManager(ProfilerManager* profiler_manager);
// Add a key-value pair to output as part of the context stanza in the report.
BENCHMARK_EXPORT
void AddCustomContext(const std::string& key, const std::string& value);

View File

@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
void RegisterProfilerManager(ProfilerManager* manager) {
internal::profiler_manager = manager;
}
void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>();

View File

@ -62,6 +62,8 @@ namespace internal {
MemoryManager* memory_manager = nullptr;
ProfilerManager* profiler_manager = nullptr;
namespace {
static constexpr IterationCount kMaxIterations = 1000000000000;
@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() {
}
}
MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
IterationCount memory_iterations) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
MemoryManager::Result* memory_result = &memory_results.back();
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
return memory_result;
}
void BenchmarkRunner::RunProfilerManager() {
// TODO: Provide a way to specify the number of iterations.
IterationCount profile_iterations = 1;
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
profiler_manager->AfterSetupStart();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr);
manager->WaitForAllThreads();
profiler_manager->BeforeTeardownStop();
manager.reset();
b.Teardown();
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() {
"then we should have accepted the current iteration run.");
}
// Oh, one last thing, we need to also produce the 'memory measurements'..
// Produce memory measurements if requested.
MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
memory_result = &memory_results.back();
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
memory_result = RunMemoryManager(memory_iterations);
}
if (profiler_manager != nullptr) {
RunProfilerManager();
}
// Ok, now actually report.

View File

@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
namespace internal {
extern MemoryManager* memory_manager;
extern ProfilerManager* profiler_manager;
struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates;
@ -113,6 +114,10 @@ class BenchmarkRunner {
};
IterationResults DoNIterations();
MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
void RunProfilerManager();
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;

View File

@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous
compile_output_test(memory_manager_test)
benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
compile_output_test(profiler_manager_test)
benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)
# MSVC does not allow to set the language standard to C++98/03.
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
compile_benchmark_test(cxx03_test)

View File

@ -0,0 +1,43 @@
// FIXME: WIP
#include <memory>
#include "benchmark/benchmark.h"
#include "output_test.h"
class TestProfilerManager : public benchmark::ProfilerManager {
void AfterSetupStart() override {}
void BeforeTeardownStop() override {}
};
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
auto iterations = state.iterations();
benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
{"\"family_index\": 0,$", MR_Next},
{"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_empty\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
{"\"time_unit\": \"ns\"$", MR_Next},
{"}", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});
int main(int argc, char* argv[]) {
std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
benchmark::RegisterProfilerManager(pm.get());
RunOutputTests(argc, argv);
benchmark::RegisterProfilerManager(nullptr);
}