mirror of https://github.com/google/benchmark.git
Support optional, user-directed collection of performance counters (#1114)
* Support optional, user-directed collection of performance counters The patch allows an engineer wishing to drill into the root causes of a regression, for example. Currently, only single threaded runs are supported. The feature is a build-time opt in, and then a runtime opt in. The engineer may run the benchmark executable, passing a list of performance counter names (using libpfm's naming scheme) at the command line. The counter values will then be collected and reported back as UserCounters. This is different from #240 in that it is a benchmark user opt-in, and the counter collection is transparent to the benchmark. Currently, this is only supported on platforms where libpfm is supported. libpfm: http://perfmon2.sourceforge.net/ * 'Use' values param in Snapshot when BENCHMARK_OS_WINDOWS This is to avoid unused parameter warning-as-error * Added missing include for <vector> in perf_counters.cc * Moved doc to docs * Added license blurbs
This commit is contained in:
parent
835951aa44
commit
376ebc2635
|
@ -0,0 +1,44 @@
|
|||
name: build-and-test-perfcounters
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
job:
|
||||
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
|
||||
name: ${{ matrix.os }}.${{ matrix.build_type }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04]
|
||||
build_type: ['Release', 'Debug']
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: install libpfm
|
||||
run: sudo apt install libpfm4-dev
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: configure cmake
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake -DBENCHMARK_ENABLE_LIBPFM=1 -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build . --config ${{ matrix.build_type }}
|
||||
|
||||
# Skip testing, for now. It seems perf_event_open does not succeed on the
|
||||
# hosting machine, very likely a permissions issue.
|
||||
# TODO(mtrofin): Enable test.
|
||||
# - name: test
|
||||
# shell: bash
|
||||
# working-directory: ${{ runner.workspace }}/_build
|
||||
# run: sudo ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure
|
|
@ -270,6 +270,10 @@ cxx_feature_check(STEADY_CLOCK)
|
|||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if (BENCHMARK_ENABLE_LIBPFM)
|
||||
find_package(PFM)
|
||||
endif()
|
||||
|
||||
# Set up directories
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
|
||||
|
|
|
@ -297,6 +297,8 @@ too (`-lkstat`).
|
|||
|
||||
[Setting the Time Unit](#setting-the-time-unit)
|
||||
|
||||
[User-Requested Performance Counters](docs/perf_counters.md)
|
||||
|
||||
[Preventing Optimization](#preventing-optimization)
|
||||
|
||||
[Reporting Statistics](#reporting-statistics)
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# If successful, the following variables will be defined:
|
||||
# HAVE_LIBPFM.
|
||||
# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence.
|
||||
include(CheckIncludeFile)
|
||||
include(CheckLibraryExists)
|
||||
enable_language(C)
|
||||
|
||||
check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE)
|
||||
if(HAVE_LIBPFM_INITIALIZE)
|
||||
check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H)
|
||||
check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H)
|
||||
check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||
if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||
message("Using Perf Counters.")
|
||||
set(HAVE_LIBPFM 1)
|
||||
endif()
|
||||
else()
|
||||
message("Perf Counters support requested, but was unable to find libpfm.")
|
||||
endif()
|
|
@ -0,0 +1,35 @@
|
|||
<a name="perf-counters" />
|
||||
|
||||
# User-Requested Performance Counters
|
||||
|
||||
When running benchmarks, the user may choose to request collection of
|
||||
performance counters. This may be useful in investigation scenarios - narrowing
|
||||
down the cause of a regression; or verifying that the underlying cause of a
|
||||
performance improvement matches expectations.
|
||||
|
||||
This feature is available if:
|
||||
|
||||
* The benchmark is run on an architecture featuring a Performance Monitoring
|
||||
Unit (PMU),
|
||||
* The benchmark is compiled with support for collecting counters. Currently,
|
||||
this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build
|
||||
time, and
|
||||
* Currently, there is a limitation that the benchmark be run on one thread.
|
||||
|
||||
The feature does not require modifying benchmark code. Counter collection is
|
||||
handled at the boundaries where timer collection is also handled.
|
||||
|
||||
To opt-in:
|
||||
|
||||
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
|
||||
* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM.
|
||||
|
||||
To use, pass a comma-separated list of counter names through the
|
||||
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
|
||||
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
|
||||
mapped by libpfm to platform-specifics - see libpfm
|
||||
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
|
||||
|
||||
The counter values are reported back through the [User Counters](../README.md#custom-counters)
|
||||
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
|
||||
by User Counters.
|
|
@ -448,6 +448,7 @@ struct Statistics {
|
|||
struct BenchmarkInstance;
|
||||
class ThreadTimer;
|
||||
class ThreadManager;
|
||||
class PerfCountersMeasurement;
|
||||
|
||||
enum AggregationReportMode
|
||||
#if defined(BENCHMARK_HAS_CXX11)
|
||||
|
@ -687,15 +688,17 @@ class State {
|
|||
private:
|
||||
State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager);
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement);
|
||||
|
||||
void StartKeepRunning();
|
||||
// Implementation of KeepRunning() and KeepRunningBatch().
|
||||
// is_batch must be true unless n is 1.
|
||||
bool KeepRunningInternal(IterationCount n, bool is_batch);
|
||||
void FinishKeepRunning();
|
||||
internal::ThreadTimer* timer_;
|
||||
internal::ThreadManager* manager_;
|
||||
internal::ThreadTimer* const timer_;
|
||||
internal::ThreadManager* const manager_;
|
||||
internal::PerfCountersMeasurement* const perf_counters_measurement_;
|
||||
|
||||
friend struct internal::BenchmarkInstance;
|
||||
};
|
||||
|
|
|
@ -28,6 +28,12 @@ target_include_directories(benchmark PUBLIC
|
|||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||
)
|
||||
|
||||
# libpfm, if available
|
||||
if (HAVE_LIBPFM)
|
||||
target_link_libraries(benchmark libpfm.a)
|
||||
add_definitions(-DHAVE_LIBPFM)
|
||||
endif()
|
||||
|
||||
# Link threads.
|
||||
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||
find_library(LIBRT rt)
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
#include "perf_counters.h"
|
||||
#include "re.h"
|
||||
#include "statistics.h"
|
||||
#include "string_util.h"
|
||||
|
@ -106,6 +107,10 @@ DEFINE_bool(benchmark_counters_tabular, false);
|
|||
// The level of verbose logging to output
|
||||
DEFINE_int32(v, 0);
|
||||
|
||||
// List of additional perf counters to collect, in libpfm format. For more
|
||||
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
|
||||
DEFINE_string(benchmark_perf_counters, "");
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
namespace internal {
|
||||
|
@ -117,7 +122,8 @@ void UseCharPointer(char const volatile*) {}
|
|||
|
||||
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager)
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement)
|
||||
: total_iterations_(0),
|
||||
batch_leftover_(0),
|
||||
max_iterations(max_iters),
|
||||
|
@ -130,7 +136,8 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
|||
thread_index(thread_i),
|
||||
threads(n_threads),
|
||||
timer_(timer),
|
||||
manager_(manager) {
|
||||
manager_(manager),
|
||||
perf_counters_measurement_(perf_counters_measurement) {
|
||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||
|
||||
|
@ -163,11 +170,23 @@ void State::PauseTiming() {
|
|||
// Add in time accumulated so far
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
timer_->StopTimer();
|
||||
if (perf_counters_measurement_) {
|
||||
auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
|
||||
for (const auto& name_and_measurement : measurements) {
|
||||
auto name = name_and_measurement.first;
|
||||
auto measurement = name_and_measurement.second;
|
||||
CHECK_EQ(counters[name], 0.0);
|
||||
counters[name] = Counter(measurement, Counter::kAvgIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void State::ResumeTiming() {
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
timer_->StartTimer();
|
||||
if (perf_counters_measurement_) {
|
||||
perf_counters_measurement_->Start();
|
||||
}
|
||||
}
|
||||
|
||||
void State::SkipWithError(const char* msg) {
|
||||
|
@ -457,7 +476,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
|||
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
|
||||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
|
||||
&FLAGS_benchmark_counters_tabular) ||
|
||||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
|
||||
ParseInt32Flag(argv[i], "v", &FLAGS_v) ||
|
||||
ParseStringFlag(argv[i], "benchmark_perf_counters",
|
||||
&FLAGS_benchmark_perf_counters)) {
|
||||
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
|
||||
|
||||
--(*argc);
|
||||
|
|
|
@ -3,10 +3,12 @@
|
|||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
State BenchmarkInstance::Run(IterationCount iters, int thread_id,
|
||||
internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager) const {
|
||||
State st(iters, arg, thread_id, threads, timer, manager);
|
||||
State BenchmarkInstance::Run(
|
||||
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement) const {
|
||||
State st(iters, arg, thread_id, threads, timer, manager,
|
||||
perf_counters_measurement);
|
||||
benchmark->Run(st);
|
||||
return st;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,8 @@ struct BenchmarkInstance {
|
|||
int threads; // Number of concurrent threads to us
|
||||
|
||||
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager) const;
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement) const;
|
||||
};
|
||||
|
||||
bool FindBenchmarksInternal(const std::string& re,
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
#include "perf_counters.h"
|
||||
#include "re.h"
|
||||
#include "statistics.h"
|
||||
#include "string_util.h"
|
||||
|
@ -111,12 +112,14 @@ BenchmarkReporter::Run CreateRunReport(
|
|||
// Execute one thread of benchmark b for the specified number of iterations.
|
||||
// Adds the stats collected for the thread into manager->results.
|
||||
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
||||
int thread_id, ThreadManager* manager) {
|
||||
int thread_id, ThreadManager* manager,
|
||||
PerfCountersMeasurement* perf_counters_measurement) {
|
||||
internal::ThreadTimer timer(
|
||||
b->measure_process_cpu_time
|
||||
? internal::ThreadTimer::CreateProcessCpuTime()
|
||||
: internal::ThreadTimer::Create());
|
||||
State st = b->Run(iters, thread_id, &timer, manager);
|
||||
State st =
|
||||
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
|
||||
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
|
||||
<< "Benchmark returned before State::KeepRunning() returned false!";
|
||||
{
|
||||
|
@ -143,7 +146,12 @@ class BenchmarkRunner {
|
|||
: FLAGS_benchmark_repetitions),
|
||||
has_explicit_iteration_count(b.iterations != 0),
|
||||
pool(b.threads - 1),
|
||||
iters(has_explicit_iteration_count ? b.iterations : 1) {
|
||||
iters(has_explicit_iteration_count ? b.iterations : 1),
|
||||
perf_counters_measurement(
|
||||
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
|
||||
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
|
||||
? &perf_counters_measurement
|
||||
: nullptr) {
|
||||
run_results.display_report_aggregates_only =
|
||||
(FLAGS_benchmark_report_aggregates_only ||
|
||||
FLAGS_benchmark_display_aggregates_only);
|
||||
|
@ -155,6 +163,11 @@ class BenchmarkRunner {
|
|||
internal::ARM_DisplayReportAggregatesOnly);
|
||||
run_results.file_report_aggregates_only =
|
||||
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
|
||||
CHECK(b.threads == 1 || !perf_counters_measurement.IsValid())
|
||||
<< "Perf counters are not supported in multi-threaded cases.\n";
|
||||
CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
||||
perf_counters_measurement.IsValid())
|
||||
<< "Perf counters were requested but could not be set up.";
|
||||
}
|
||||
|
||||
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
|
||||
|
@ -192,6 +205,9 @@ class BenchmarkRunner {
|
|||
// So only the first repetition has to find/calculate it,
|
||||
// the other repetitions will just use that precomputed iteration count.
|
||||
|
||||
PerfCountersMeasurement perf_counters_measurement;
|
||||
PerfCountersMeasurement* const perf_counters_measurement_ptr;
|
||||
|
||||
struct IterationResults {
|
||||
internal::ThreadManager::Result results;
|
||||
IterationCount iters;
|
||||
|
@ -206,12 +222,12 @@ class BenchmarkRunner {
|
|||
// Run all but one thread in separate threads
|
||||
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
||||
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
|
||||
manager.get());
|
||||
manager.get(), perf_counters_measurement_ptr);
|
||||
}
|
||||
// And run one thread here directly.
|
||||
// (If we were asked to run just one thread, we don't create new threads.)
|
||||
// Yes, we need to do this here *after* we start the separate threads.
|
||||
RunInThread(&b, iters, 0, manager.get());
|
||||
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
|
||||
|
||||
// The main thread has finished. Now let's wait for the other threads.
|
||||
manager->WaitForAllThreads();
|
||||
|
@ -331,7 +347,8 @@ class BenchmarkRunner {
|
|||
memory_manager->Start();
|
||||
std::unique_ptr<internal::ThreadManager> manager;
|
||||
manager.reset(new internal::ThreadManager(1));
|
||||
RunInThread(&b, memory_iterations, 0, manager.get());
|
||||
RunInThread(&b, memory_iterations, 0, manager.get(),
|
||||
perf_counters_measurement_ptr);
|
||||
manager->WaitForAllThreads();
|
||||
manager.reset();
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ DECLARE_bool(benchmark_report_aggregates_only);
|
|||
|
||||
DECLARE_bool(benchmark_display_aggregates_only);
|
||||
|
||||
DECLARE_string(benchmark_perf_counters);
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
namespace internal {
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "perf_counters.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#if defined HAVE_LIBPFM
|
||||
#include "perfmon/pfmlib.h"
|
||||
#include "perfmon/pfmlib_perf_event.h"
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
constexpr size_t PerfCounterValues::kMaxCounters;
|
||||
|
||||
#if defined HAVE_LIBPFM
|
||||
const bool PerfCounters::kSupported = true;
|
||||
|
||||
bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
|
||||
|
||||
PerfCounters PerfCounters::Create(
|
||||
const std::vector<std::string>& counter_names) {
|
||||
if (counter_names.empty()) {
|
||||
return NoCounters();
|
||||
}
|
||||
if (counter_names.size() > PerfCounterValues::kMaxCounters) {
|
||||
GetErrorLogInstance()
|
||||
<< counter_names.size()
|
||||
<< " counters were requested. The minimum is 1, the maximum is "
|
||||
<< PerfCounterValues::kMaxCounters << "\n";
|
||||
return NoCounters();
|
||||
}
|
||||
std::vector<int> counter_ids(counter_names.size());
|
||||
|
||||
const int mode = PFM_PLM3; // user mode only
|
||||
for (size_t i = 0; i < counter_names.size(); ++i) {
|
||||
const bool is_first = i == 0;
|
||||
struct perf_event_attr attr{};
|
||||
attr.size = sizeof(attr);
|
||||
const int group_id = !is_first ? counter_ids[0] : -1;
|
||||
const auto& name = counter_names[i];
|
||||
if (name.empty()) {
|
||||
GetErrorLogInstance() << "A counter name was the empty string\n";
|
||||
return NoCounters();
|
||||
}
|
||||
pfm_perf_encode_arg_t arg{};
|
||||
arg.attr = &attr;
|
||||
|
||||
const int pfm_get =
|
||||
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
|
||||
if (pfm_get != PFM_SUCCESS) {
|
||||
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
|
||||
return NoCounters();
|
||||
}
|
||||
attr.disabled = is_first;
|
||||
attr.pinned = is_first;
|
||||
attr.exclude_kernel = true;
|
||||
attr.exclude_user = false;
|
||||
attr.exclude_hv = true;
|
||||
// Read all counters in one read.
|
||||
attr.read_format = PERF_FORMAT_GROUP;
|
||||
|
||||
int id = -1;
|
||||
static constexpr size_t kNrOfSyscallRetries = 5;
|
||||
// Retry syscall as it was interrupted often (b/64774091).
|
||||
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
|
||||
++num_retries) {
|
||||
id = perf_event_open(&attr, 0, -1, group_id, 0);
|
||||
if (id >= 0 || errno != EINTR) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (id < 0) {
|
||||
GetErrorLogInstance()
|
||||
<< "Failed to get a file descriptor for " << name << "\n";
|
||||
return NoCounters();
|
||||
}
|
||||
|
||||
counter_ids[i] = id;
|
||||
}
|
||||
if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
|
||||
GetErrorLogInstance() << "Failed to start counters\n";
|
||||
return NoCounters();
|
||||
}
|
||||
|
||||
return PerfCounters(counter_names, std::move(counter_ids));
|
||||
}
|
||||
|
||||
PerfCounters::~PerfCounters() {
|
||||
if (counter_ids_.empty()) {
|
||||
return;
|
||||
}
|
||||
ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
|
||||
for (int fd : counter_ids_) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
#else // defined HAVE_LIBPFM
|
||||
const bool PerfCounters::kSupported = false;
|
||||
|
||||
bool PerfCounters::Initialize() { return false; }
|
||||
|
||||
PerfCounters PerfCounters::Create(
|
||||
const std::vector<std::string>& counter_names) {
|
||||
if (!counter_names.empty()) {
|
||||
GetErrorLogInstance() << "Performance counters not supported.";
|
||||
}
|
||||
return NoCounters();
|
||||
}
|
||||
|
||||
PerfCounters::~PerfCounters() = default;
|
||||
#endif // defined HAVE_LIBPFM
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
|
@ -0,0 +1,172 @@
|
|||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef BENCHMARK_PERF_COUNTERS_H
|
||||
#define BENCHMARK_PERF_COUNTERS_H
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "log.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
// Typically, we can only read a small number of counters. There is also a
|
||||
// padding preceding counter values, when reading multiple counters with one
|
||||
// syscall (which is desirable). PerfCounterValues abstracts these details.
|
||||
// The implementation ensures the storage is inlined, and allows 0-based
|
||||
// indexing into the counter values.
|
||||
// The object is used in conjunction with a PerfCounters object, by passing it
|
||||
// to Snapshot(). The values are populated such that
|
||||
// perfCounters->names()[i]'s value is obtained at position i (as given by
|
||||
// operator[]) of this object.
|
||||
class PerfCounterValues {
|
||||
public:
|
||||
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
|
||||
CHECK_LE(nr_counters_, kMaxCounters);
|
||||
}
|
||||
|
||||
uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
|
||||
|
||||
static constexpr size_t kMaxCounters = 3;
|
||||
|
||||
private:
|
||||
friend class PerfCounters;
|
||||
// Get the byte buffer in which perf counters can be captured.
|
||||
// This is used by PerfCounters::Read
|
||||
std::pair<char*, size_t> get_data_buffer() {
|
||||
return {reinterpret_cast<char*>(values_.data()),
|
||||
sizeof(uint64_t) * (kPadding + nr_counters_)};
|
||||
}
|
||||
|
||||
static constexpr size_t kPadding = 1;
|
||||
std::array<uint64_t, kPadding + kMaxCounters> values_;
|
||||
const size_t nr_counters_;
|
||||
};
|
||||
|
||||
// Collect PMU counters. The object, once constructed, is ready to be used by
|
||||
// calling read(). PMU counter collection is enabled from the time create() is
|
||||
// called, to obtain the object, until the object's destructor is called.
|
||||
class PerfCounters final {
|
||||
public:
|
||||
// True iff this platform supports performance counters.
|
||||
static const bool kSupported;
|
||||
|
||||
bool IsValid() const { return is_valid_; }
|
||||
static PerfCounters NoCounters() { return PerfCounters(); }
|
||||
|
||||
~PerfCounters();
|
||||
PerfCounters(PerfCounters&&) = default;
|
||||
PerfCounters(const PerfCounters&) = delete;
|
||||
|
||||
// Platform-specific implementations may choose to do some library
|
||||
// initialization here.
|
||||
static bool Initialize();
|
||||
|
||||
// Return a PerfCounters object ready to read the counters with the names
|
||||
// specified. The values are user-mode only. The counter name format is
|
||||
// implementation and OS specific.
|
||||
// TODO: once we move to C++-17, this should be a std::optional, and then the
|
||||
// IsValid() boolean can be dropped.
|
||||
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
||||
|
||||
// Take a snapshot of the current value of the counters into the provided
|
||||
// valid PerfCounterValues storage. The values are populated such that:
|
||||
// names()[i]'s value is (*values)[i]
|
||||
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) {
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
assert(values != nullptr);
|
||||
assert(IsValid());
|
||||
auto buffer = values->get_data_buffer();
|
||||
auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
|
||||
return static_cast<size_t>(read_bytes) == buffer.second;
|
||||
#else
|
||||
(void)values;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
const std::vector<std::string>& names() const { return counter_names_; }
|
||||
size_t num_counters() const { return counter_names_.size(); }
|
||||
|
||||
private:
|
||||
PerfCounters(const std::vector<std::string>& counter_names,
|
||||
std::vector<int>&& counter_ids)
|
||||
: counter_ids_(std::move(counter_ids)),
|
||||
counter_names_(counter_names),
|
||||
is_valid_(true) {}
|
||||
PerfCounters() : is_valid_(false) {}
|
||||
|
||||
std::vector<int> counter_ids_;
|
||||
const std::vector<std::string> counter_names_;
|
||||
const bool is_valid_;
|
||||
};
|
||||
|
||||
// Typical usage of the above primitives.
|
||||
class PerfCountersMeasurement final {
|
||||
public:
|
||||
PerfCountersMeasurement(PerfCounters&& c)
|
||||
: counters_(std::move(c)),
|
||||
start_values_(counters_.IsValid() ? counters_.names().size() : 0),
|
||||
end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
|
||||
|
||||
bool IsValid() const { return counters_.IsValid(); }
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE void Start() {
|
||||
assert(IsValid());
|
||||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
counters_.Snapshot(&start_values_);
|
||||
ClobberMemory();
|
||||
}
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
|
||||
StopAndGetMeasurements() {
|
||||
assert(IsValid());
|
||||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
counters_.Snapshot(&end_values_);
|
||||
ClobberMemory();
|
||||
|
||||
std::vector<std::pair<std::string, double>> ret;
|
||||
for (size_t i = 0; i < counters_.names().size(); ++i) {
|
||||
double measurement = static_cast<double>(end_values_[i]) -
|
||||
static_cast<double>(start_values_[i]);
|
||||
ret.push_back({counters_.names()[i], measurement});
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
PerfCounters counters_;
|
||||
PerfCounterValues start_values_;
|
||||
PerfCounterValues end_values_;
|
||||
};
|
||||
|
||||
BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
|
||||
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
||||
|
||||
#endif // BENCHMARK_PERF_COUNTERS_H
|
|
@ -163,6 +163,18 @@ std::string StrFormat(const char* format, ...) {
|
|||
return tmp;
|
||||
}
|
||||
|
||||
std::vector<std::string> StrSplit(const std::string& str, char delim) {
|
||||
std::vector<std::string> ret;
|
||||
size_t first = 0;
|
||||
size_t next = str.find(delim);
|
||||
for (; next != std::string::npos;
|
||||
first = next + 1, next = str.find(first, delim)) {
|
||||
ret.push_back(str.substr(first, next - first));
|
||||
}
|
||||
ret.push_back(str.substr(first));
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
||||
/*
|
||||
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
||||
|
|
|
@ -37,6 +37,8 @@ inline std::string StrCat(Args&&... args) {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::vector<std::string> StrSplit(const std::string& str, char delim);
|
||||
|
||||
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
||||
/*
|
||||
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
||||
|
|
|
@ -128,6 +128,9 @@ add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_
|
|||
compile_output_test(user_counters_test)
|
||||
add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
|
||||
|
||||
compile_output_test(perf_counters_test)
|
||||
add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES)
|
||||
|
||||
compile_output_test(internal_threading_test)
|
||||
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
|
||||
|
||||
|
@ -196,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
|
|||
add_gtest(commandlineflags_gtest)
|
||||
add_gtest(statistics_gtest)
|
||||
add_gtest(string_util_gtest)
|
||||
add_gtest(perf_counters_gtest)
|
||||
endif(BENCHMARK_ENABLE_GTEST_TESTS)
|
||||
|
||||
###############################################################################
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
#include "../src/perf_counters.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#ifndef GTEST_SKIP
|
||||
struct MsgHandler {
|
||||
void operator=(std::ostream&){}
|
||||
};
|
||||
#define GTEST_SKIP() return MsgHandler() = std::cout
|
||||
#endif
|
||||
|
||||
using benchmark::internal::PerfCounters;
|
||||
using benchmark::internal::PerfCounterValues;
|
||||
|
||||
namespace {
|
||||
const char kGenericPerfEvent1[] = "CYCLES";
|
||||
const char kGenericPerfEvent2[] = "BRANCHES";
|
||||
const char kGenericPerfEvent3[] = "INSTRUCTIONS";
|
||||
|
||||
TEST(PerfCountersTest, Init) {
|
||||
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, OneCounter) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Performance counters not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, NegativeTest) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
EXPECT_FALSE(PerfCounters::Initialize());
|
||||
return;
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
EXPECT_FALSE(PerfCounters::Create({}).IsValid());
|
||||
EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
|
||||
EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
|
||||
{
|
||||
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||
kGenericPerfEvent3})
|
||||
.IsValid());
|
||||
}
|
||||
EXPECT_FALSE(
|
||||
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1})
|
||||
.IsValid());
|
||||
EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name",
|
||||
kGenericPerfEvent1})
|
||||
.IsValid());
|
||||
{
|
||||
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||
kGenericPerfEvent3})
|
||||
.IsValid());
|
||||
}
|
||||
EXPECT_FALSE(
|
||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||
kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"})
|
||||
.IsValid());
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, Read1Counter) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
||||
EXPECT_TRUE(counters.IsValid());
|
||||
PerfCounterValues values1(1);
|
||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||
EXPECT_GT(values1[0], 0);
|
||||
PerfCounterValues values2(1);
|
||||
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||
EXPECT_GT(values2[0], 0);
|
||||
EXPECT_GT(values2[0], values1[0]);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, Read2Counters) {
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
auto counters =
|
||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
||||
EXPECT_TRUE(counters.IsValid());
|
||||
PerfCounterValues values1(2);
|
||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||
EXPECT_GT(values1[0], 0);
|
||||
EXPECT_GT(values1[1], 0);
|
||||
PerfCounterValues values2(2);
|
||||
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||
EXPECT_GT(values2[0], 0);
|
||||
EXPECT_GT(values2[1], 0);
|
||||
}
|
||||
} // namespace
|
|
@ -0,0 +1,27 @@
|
|||
#undef NDEBUG
|
||||
|
||||
#include "../src/perf_counters.h"
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "output_test.h"
|
||||
|
||||
void BM_Simple(benchmark::State& state) {
|
||||
for (auto _ : state) {
|
||||
benchmark::DoNotOptimize(state.iterations());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_Simple);
|
||||
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
|
||||
|
||||
void CheckSimple(Results const& e) {
|
||||
CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
|
||||
CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0);
|
||||
}
|
||||
CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (!benchmark::internal::PerfCounters::kSupported) {
|
||||
return 0;
|
||||
}
|
||||
RunOutputTests(argc, argv);
|
||||
}
|
|
@ -150,4 +150,12 @@ TEST(StringUtilTest, stod) {
|
|||
#endif
|
||||
}
|
||||
|
||||
TEST(StringUtilTest, StrSplit) {
|
||||
EXPECT_EQ(benchmark::StrSplit("", ','), std::vector<std::string>{""});
|
||||
EXPECT_EQ(benchmark::StrSplit("hello", ','),
|
||||
std::vector<std::string>({"hello"}));
|
||||
EXPECT_EQ(benchmark::StrSplit("hello,there", ','),
|
||||
std::vector<std::string>({"hello", "there"}));
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
|
Loading…
Reference in New Issue