diff --git a/.github/workflows/build-and-test-perfcounters.yml b/.github/workflows/build-and-test-perfcounters.yml
new file mode 100644
index 00000000..dfb88cbc
--- /dev/null
+++ b/.github/workflows/build-and-test-perfcounters.yml
@@ -0,0 +1,44 @@
+name: build-and-test-perfcounters
+
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+jobs:
+ job:
+ # TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
+ name: ${{ matrix.os }}.${{ matrix.build_type }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04]
+ build_type: ['Release', 'Debug']
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: install libpfm
+ run: sudo apt install libpfm4-dev
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: configure cmake
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake -DBENCHMARK_ENABLE_LIBPFM=1 -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+
+ - name: build
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake --build . --config ${{ matrix.build_type }}
+
+ # Skip testing, for now. It seems perf_event_open does not succeed on the
+ # hosting machine, very likely a permissions issue.
+ # TODO(mtrofin): Enable test.
+ # - name: test
+ # shell: bash
+ # working-directory: ${{ runner.workspace }}/_build
+ # run: sudo ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10072545..7e0f251f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -270,6 +270,10 @@ cxx_feature_check(STEADY_CLOCK)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
+if (BENCHMARK_ENABLE_LIBPFM)
+ find_package(PFM)
+endif()
+
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
diff --git a/README.md b/README.md
index ceedb334..e8c65f8d 100644
--- a/README.md
+++ b/README.md
@@ -297,6 +297,8 @@ too (`-lkstat`).
[Setting the Time Unit](#setting-the-time-unit)
+[User-Requested Performance Counters](docs/perf_counters.md)
+
[Preventing Optimization](#preventing-optimization)
[Reporting Statistics](#reporting-statistics)
diff --git a/cmake/Modules/FindPFM.cmake b/cmake/Modules/FindPFM.cmake
new file mode 100644
index 00000000..553d458c
--- /dev/null
+++ b/cmake/Modules/FindPFM.cmake
@@ -0,0 +1,19 @@
+# If successful, the following variables will be defined:
+# HAVE_LIBPFM.
+# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence.
+include(CheckIncludeFile)
+include(CheckLibraryExists)
+enable_language(C)
+
+check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE)
+if(HAVE_LIBPFM_INITIALIZE)
+ check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H)
+ check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H)
+ check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
+ if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
+ message("Using Perf Counters.")
+ set(HAVE_LIBPFM 1)
+ endif()
+else()
+ message("Perf Counters support requested, but was unable to find libpfm.")
+endif()
diff --git a/docs/perf_counters.md b/docs/perf_counters.md
new file mode 100644
index 00000000..43ff4517
--- /dev/null
+++ b/docs/perf_counters.md
@@ -0,0 +1,35 @@
+
+
+# User-Requested Performance Counters
+
+When running benchmarks, the user may choose to request collection of
+performance counters. This may be useful in investigation scenarios - narrowing
+down the cause of a regression; or verifying that the underlying cause of a
+performance improvement matches expectations.
+
+This feature is available if:
+
+* The benchmark is run on an architecture featuring a Performance Monitoring
+ Unit (PMU),
+* The benchmark is compiled with support for collecting counters. Currently,
+ this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build
+ time, and
+* Currently, there is a limitation that the benchmark be run on one thread.
+
+The feature does not require modifying benchmark code. Counter collection is
+handled at the boundaries where timer collection is also handled.
+
+To opt-in:
+
+* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
+* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM.
+
+To use, pass a comma-separated list of counter names through the
+`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
+they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
+mapped by libpfm to platform-specifics - see libpfm
+[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
+
+The counter values are reported back through the [User Counters](../README.md#custom-counters)
+mechanism, meaning, they are available in all the formats (e.g. JSON) supported
+by User Counters.
\ No newline at end of file
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index 881ce9e5..664422b3 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -448,6 +448,7 @@ struct Statistics {
struct BenchmarkInstance;
class ThreadTimer;
class ThreadManager;
+class PerfCountersMeasurement;
enum AggregationReportMode
#if defined(BENCHMARK_HAS_CXX11)
@@ -687,15 +688,17 @@ class State {
private:
State(IterationCount max_iters, const std::vector& ranges,
int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager);
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement);
void StartKeepRunning();
// Implementation of KeepRunning() and KeepRunningBatch().
// is_batch must be true unless n is 1.
bool KeepRunningInternal(IterationCount n, bool is_batch);
void FinishKeepRunning();
- internal::ThreadTimer* timer_;
- internal::ThreadManager* manager_;
+ internal::ThreadTimer* const timer_;
+ internal::ThreadManager* const manager_;
+ internal::PerfCountersMeasurement* const perf_counters_measurement_;
friend struct internal::BenchmarkInstance;
};
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 35d559ee..a6c8e9a7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -28,6 +28,12 @@ target_include_directories(benchmark PUBLIC
$
)
+# libpfm, if available
+if (HAVE_LIBPFM)
+ target_link_libraries(benchmark libpfm.a)
+ add_definitions(-DHAVE_LIBPFM)
+endif()
+
# Link threads.
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
find_library(LIBRT rt)
diff --git a/src/benchmark.cc b/src/benchmark.cc
index ffe4bf45..1fea654c 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -45,6 +45,7 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
+#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
@@ -106,6 +107,10 @@ DEFINE_bool(benchmark_counters_tabular, false);
// The level of verbose logging to output
DEFINE_int32(v, 0);
+// List of additional perf counters to collect, in libpfm format. For more
+// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
+DEFINE_string(benchmark_perf_counters, "");
+
namespace benchmark {
namespace internal {
@@ -117,7 +122,8 @@ void UseCharPointer(char const volatile*) {}
State::State(IterationCount max_iters, const std::vector& ranges,
int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager)
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
@@ -130,7 +136,8 @@ State::State(IterationCount max_iters, const std::vector& ranges,
thread_index(thread_i),
threads(n_threads),
timer_(timer),
- manager_(manager) {
+ manager_(manager),
+ perf_counters_measurement_(perf_counters_measurement) {
CHECK(max_iterations != 0) << "At least one iteration must be run";
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
@@ -163,11 +170,23 @@ void State::PauseTiming() {
// Add in time accumulated so far
CHECK(started_ && !finished_ && !error_occurred_);
timer_->StopTimer();
+ if (perf_counters_measurement_) {
+ auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
+ for (const auto& name_and_measurement : measurements) {
+ auto name = name_and_measurement.first;
+ auto measurement = name_and_measurement.second;
+ CHECK_EQ(counters[name], 0.0);
+ counters[name] = Counter(measurement, Counter::kAvgIterations);
+ }
+ }
}
void State::ResumeTiming() {
CHECK(started_ && !finished_ && !error_occurred_);
timer_->StartTimer();
+ if (perf_counters_measurement_) {
+ perf_counters_measurement_->Start();
+ }
}
void State::SkipWithError(const char* msg) {
@@ -457,7 +476,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
&FLAGS_benchmark_counters_tabular) ||
- ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
+ ParseInt32Flag(argv[i], "v", &FLAGS_v) ||
+ ParseStringFlag(argv[i], "benchmark_perf_counters",
+ &FLAGS_benchmark_perf_counters)) {
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
--(*argc);
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index d468a257..804ef894 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -3,10 +3,12 @@
namespace benchmark {
namespace internal {
-State BenchmarkInstance::Run(IterationCount iters, int thread_id,
- internal::ThreadTimer* timer,
- internal::ThreadManager* manager) const {
- State st(iters, arg, thread_id, threads, timer, manager);
+State BenchmarkInstance::Run(
+ IterationCount iters, int thread_id, internal::ThreadTimer* timer,
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement) const {
+ State st(iters, arg, thread_id, threads, timer, manager,
+ perf_counters_measurement);
benchmark->Run(st);
return st;
}
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 264eff95..b740bce1 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -36,7 +36,8 @@ struct BenchmarkInstance {
int threads; // Number of concurrent threads to us
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
- internal::ThreadManager* manager) const;
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement) const;
};
bool FindBenchmarksInternal(const std::string& re,
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index d081aa86..083d1849 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -45,6 +45,7 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
+#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
@@ -111,12 +112,14 @@ BenchmarkReporter::Run CreateRunReport(
// Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
- int thread_id, ThreadManager* manager) {
+ int thread_id, ThreadManager* manager,
+ PerfCountersMeasurement* perf_counters_measurement) {
internal::ThreadTimer timer(
b->measure_process_cpu_time
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
- State st = b->Run(iters, thread_id, &timer, manager);
+ State st =
+ b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
@@ -143,7 +146,12 @@ class BenchmarkRunner {
: FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations != 0),
pool(b.threads - 1),
- iters(has_explicit_iteration_count ? b.iterations : 1) {
+ iters(has_explicit_iteration_count ? b.iterations : 1),
+ perf_counters_measurement(
+ PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
+ perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
+ ? &perf_counters_measurement
+ : nullptr) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
@@ -155,6 +163,11 @@ class BenchmarkRunner {
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
+ CHECK(b.threads == 1 || !perf_counters_measurement.IsValid())
+ << "Perf counters are not supported in multi-threaded cases.\n";
+ CHECK(FLAGS_benchmark_perf_counters.empty() ||
+ perf_counters_measurement.IsValid())
+ << "Perf counters were requested but could not be set up.";
}
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
@@ -192,6 +205,9 @@ class BenchmarkRunner {
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
+ PerfCountersMeasurement perf_counters_measurement;
+ PerfCountersMeasurement* const perf_counters_measurement_ptr;
+
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
@@ -206,12 +222,12 @@ class BenchmarkRunner {
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1),
- manager.get());
+ manager.get(), perf_counters_measurement_ptr);
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
- RunInThread(&b, iters, 0, manager.get());
+ RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
// The main thread has finished. Now let's wait for the other threads.
manager->WaitForAllThreads();
@@ -331,7 +347,8 @@ class BenchmarkRunner {
memory_manager->Start();
std::unique_ptr manager;
manager.reset(new internal::ThreadManager(1));
- RunInThread(&b, memory_iterations, 0, manager.get());
+ RunInThread(&b, memory_iterations, 0, manager.get(),
+ perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h
index 96e8282a..9b0cf2a6 100644
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@@ -26,6 +26,8 @@ DECLARE_bool(benchmark_report_aggregates_only);
DECLARE_bool(benchmark_display_aggregates_only);
+DECLARE_string(benchmark_perf_counters);
+
namespace benchmark {
namespace internal {
diff --git a/src/perf_counters.cc b/src/perf_counters.cc
new file mode 100644
index 00000000..eb28cd99
--- /dev/null
+++ b/src/perf_counters.cc
@@ -0,0 +1,128 @@
+// Copyright 2021 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "perf_counters.h"
+
+#include
+#include
+
+#if defined HAVE_LIBPFM
+#include "perfmon/pfmlib.h"
+#include "perfmon/pfmlib_perf_event.h"
+#endif
+
+namespace benchmark {
+namespace internal {
+
+constexpr size_t PerfCounterValues::kMaxCounters;
+
+#if defined HAVE_LIBPFM
+const bool PerfCounters::kSupported = true;
+
+bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
+
+PerfCounters PerfCounters::Create(
+ const std::vector& counter_names) {
+ if (counter_names.empty()) {
+ return NoCounters();
+ }
+ if (counter_names.size() > PerfCounterValues::kMaxCounters) {
+ GetErrorLogInstance()
+ << counter_names.size()
+ << " counters were requested. The minimum is 1, the maximum is "
+ << PerfCounterValues::kMaxCounters << "\n";
+ return NoCounters();
+ }
+ std::vector counter_ids(counter_names.size());
+
+ const int mode = PFM_PLM3; // user mode only
+ for (size_t i = 0; i < counter_names.size(); ++i) {
+ const bool is_first = i == 0;
+ struct perf_event_attr attr{};
+ attr.size = sizeof(attr);
+ const int group_id = !is_first ? counter_ids[0] : -1;
+ const auto& name = counter_names[i];
+ if (name.empty()) {
+ GetErrorLogInstance() << "A counter name was the empty string\n";
+ return NoCounters();
+ }
+ pfm_perf_encode_arg_t arg{};
+ arg.attr = &attr;
+
+ const int pfm_get =
+ pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
+ if (pfm_get != PFM_SUCCESS) {
+ GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
+ return NoCounters();
+ }
+ attr.disabled = is_first;
+ attr.pinned = is_first;
+ attr.exclude_kernel = true;
+ attr.exclude_user = false;
+ attr.exclude_hv = true;
+ // Read all counters in one read.
+ attr.read_format = PERF_FORMAT_GROUP;
+
+ int id = -1;
+ static constexpr size_t kNrOfSyscallRetries = 5;
+ // Retry syscall as it was interrupted often (b/64774091).
+ for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
+ ++num_retries) {
+ id = perf_event_open(&attr, 0, -1, group_id, 0);
+ if (id >= 0 || errno != EINTR) {
+ break;
+ }
+ }
+ if (id < 0) {
+ GetErrorLogInstance()
+ << "Failed to get a file descriptor for " << name << "\n";
+ return NoCounters();
+ }
+
+ counter_ids[i] = id;
+ }
+ if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
+ GetErrorLogInstance() << "Failed to start counters\n";
+ return NoCounters();
+ }
+
+ return PerfCounters(counter_names, std::move(counter_ids));
+}
+
+PerfCounters::~PerfCounters() {
+ if (counter_ids_.empty()) {
+ return;
+ }
+ ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
+ for (int fd : counter_ids_) {
+ close(fd);
+ }
+}
+#else // defined HAVE_LIBPFM
+const bool PerfCounters::kSupported = false;
+
+bool PerfCounters::Initialize() { return false; }
+
+PerfCounters PerfCounters::Create(
+ const std::vector& counter_names) {
+ if (!counter_names.empty()) {
+ GetErrorLogInstance() << "Performance counters not supported.";
+ }
+ return NoCounters();
+}
+
+PerfCounters::~PerfCounters() = default;
+#endif // defined HAVE_LIBPFM
+} // namespace internal
+} // namespace benchmark
diff --git a/src/perf_counters.h b/src/perf_counters.h
new file mode 100644
index 00000000..0c2c4616
--- /dev/null
+++ b/src/perf_counters.h
@@ -0,0 +1,172 @@
+// Copyright 2021 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BENCHMARK_PERF_COUNTERS_H
+#define BENCHMARK_PERF_COUNTERS_H
+
+#include
+#include
+#include
+
+#include "benchmark/benchmark.h"
+#include "check.h"
+#include "log.h"
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include
+#endif
+
+namespace benchmark {
+namespace internal {
+
+// Typically, we can only read a small number of counters. There is also a
+// padding preceding counter values, when reading multiple counters with one
+// syscall (which is desirable). PerfCounterValues abstracts these details.
+// The implementation ensures the storage is inlined, and allows 0-based
+// indexing into the counter values.
+// The object is used in conjunction with a PerfCounters object, by passing it
+// to Snapshot(). The values are populated such that
+// perfCounters->names()[i]'s value is obtained at position i (as given by
+// operator[]) of this object.
+class PerfCounterValues {
+ public:
+ explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
+ CHECK_LE(nr_counters_, kMaxCounters);
+ }
+
+ uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
+
+ static constexpr size_t kMaxCounters = 3;
+
+ private:
+ friend class PerfCounters;
+ // Get the byte buffer in which perf counters can be captured.
+ // This is used by PerfCounters::Read
+ std::pair get_data_buffer() {
+ return {reinterpret_cast(values_.data()),
+ sizeof(uint64_t) * (kPadding + nr_counters_)};
+ }
+
+ static constexpr size_t kPadding = 1;
+ std::array values_;
+ const size_t nr_counters_;
+};
+
+// Collect PMU counters. The object, once constructed, is ready to be used by
+// calling read(). PMU counter collection is enabled from the time create() is
+// called, to obtain the object, until the object's destructor is called.
+class PerfCounters final {
+ public:
+ // True iff this platform supports performance counters.
+ static const bool kSupported;
+
+ bool IsValid() const { return is_valid_; }
+ static PerfCounters NoCounters() { return PerfCounters(); }
+
+ ~PerfCounters();
+ PerfCounters(PerfCounters&&) = default;
+ PerfCounters(const PerfCounters&) = delete;
+
+ // Platform-specific implementations may choose to do some library
+ // initialization here.
+ static bool Initialize();
+
+ // Return a PerfCounters object ready to read the counters with the names
+ // specified. The values are user-mode only. The counter name format is
+ // implementation and OS specific.
+ // TODO: once we move to C++-17, this should be a std::optional, and then the
+ // IsValid() boolean can be dropped.
+ static PerfCounters Create(const std::vector& counter_names);
+
+ // Take a snapshot of the current value of the counters into the provided
+ // valid PerfCounterValues storage. The values are populated such that:
+ // names()[i]'s value is (*values)[i]
+ BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) {
+#ifndef BENCHMARK_OS_WINDOWS
+ assert(values != nullptr);
+ assert(IsValid());
+ auto buffer = values->get_data_buffer();
+ auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
+ return static_cast(read_bytes) == buffer.second;
+#else
+ (void)values;
+ return false;
+#endif
+ }
+
+ const std::vector& names() const { return counter_names_; }
+ size_t num_counters() const { return counter_names_.size(); }
+
+ private:
+ PerfCounters(const std::vector& counter_names,
+ std::vector&& counter_ids)
+ : counter_ids_(std::move(counter_ids)),
+ counter_names_(counter_names),
+ is_valid_(true) {}
+ PerfCounters() : is_valid_(false) {}
+
+ std::vector counter_ids_;
+ const std::vector counter_names_;
+ const bool is_valid_;
+};
+
+// Typical usage of the above primitives.
+class PerfCountersMeasurement final {
+ public:
+ PerfCountersMeasurement(PerfCounters&& c)
+ : counters_(std::move(c)),
+ start_values_(counters_.IsValid() ? counters_.names().size() : 0),
+ end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
+
+ bool IsValid() const { return counters_.IsValid(); }
+
+ BENCHMARK_ALWAYS_INLINE void Start() {
+ assert(IsValid());
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ counters_.Snapshot(&start_values_);
+ ClobberMemory();
+ }
+
+ BENCHMARK_ALWAYS_INLINE std::vector>
+ StopAndGetMeasurements() {
+ assert(IsValid());
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ counters_.Snapshot(&end_values_);
+ ClobberMemory();
+
+ std::vector> ret;
+ for (size_t i = 0; i < counters_.names().size(); ++i) {
+ double measurement = static_cast(end_values_[i]) -
+ static_cast(start_values_[i]);
+ ret.push_back({counters_.names()[i], measurement});
+ }
+ return ret;
+ }
+
+ private:
+ PerfCounters counters_;
+ PerfCounterValues start_values_;
+ PerfCounterValues end_values_;
+};
+
+BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
+
+} // namespace internal
+} // namespace benchmark
+
+#endif // BENCHMARK_PERF_COUNTERS_H
diff --git a/src/string_util.cc b/src/string_util.cc
index ac60b558..53b1532b 100644
--- a/src/string_util.cc
+++ b/src/string_util.cc
@@ -163,6 +163,18 @@ std::string StrFormat(const char* format, ...) {
return tmp;
}
+std::vector StrSplit(const std::string& str, char delim) {
+ std::vector ret;
+ size_t first = 0;
+ size_t next = str.find(delim);
+ for (; next != std::string::npos;
+ first = next + 1, next = str.find(first, delim)) {
+ ret.push_back(str.substr(first, next - first));
+ }
+ ret.push_back(str.substr(first));
+ return ret;
+}
+
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
diff --git a/src/string_util.h b/src/string_util.h
index 09d7b4bd..6bc28b69 100644
--- a/src/string_util.h
+++ b/src/string_util.h
@@ -37,6 +37,8 @@ inline std::string StrCat(Args&&... args) {
return ss.str();
}
+std::vector StrSplit(const std::string& str, char delim);
+
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c1a3a3fc..1e7b6829 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -128,6 +128,9 @@ add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_
compile_output_test(user_counters_test)
add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
+compile_output_test(perf_counters_test)
+add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES)
+
compile_output_test(internal_threading_test)
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
@@ -196,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
add_gtest(commandlineflags_gtest)
add_gtest(statistics_gtest)
add_gtest(string_util_gtest)
+ add_gtest(perf_counters_gtest)
endif(BENCHMARK_ENABLE_GTEST_TESTS)
###############################################################################
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
new file mode 100644
index 00000000..47894af4
--- /dev/null
+++ b/test/perf_counters_gtest.cc
@@ -0,0 +1,95 @@
+#include "../src/perf_counters.h"
+#include "gtest/gtest.h"
+
+#ifndef GTEST_SKIP
+struct MsgHandler {
+ void operator=(std::ostream&){}
+};
+#define GTEST_SKIP() return MsgHandler() = std::cout
+#endif
+
+using benchmark::internal::PerfCounters;
+using benchmark::internal::PerfCounterValues;
+
+namespace {
+const char kGenericPerfEvent1[] = "CYCLES";
+const char kGenericPerfEvent2[] = "BRANCHES";
+const char kGenericPerfEvent3[] = "INSTRUCTIONS";
+
+TEST(PerfCountersTest, Init) {
+ EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
+}
+
+TEST(PerfCountersTest, OneCounter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Performance counters not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
+}
+
+TEST(PerfCountersTest, NegativeTest) {
+ if (!PerfCounters::kSupported) {
+ EXPECT_FALSE(PerfCounters::Initialize());
+ return;
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ EXPECT_FALSE(PerfCounters::Create({}).IsValid());
+ EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
+ EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
+ {
+ EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
+ kGenericPerfEvent3})
+ .IsValid());
+ }
+ EXPECT_FALSE(
+ PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1})
+ .IsValid());
+ EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name",
+ kGenericPerfEvent1})
+ .IsValid());
+ {
+ EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
+ kGenericPerfEvent3})
+ .IsValid());
+ }
+ EXPECT_FALSE(
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
+ kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"})
+ .IsValid());
+}
+
+TEST(PerfCountersTest, Read1Counter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters = PerfCounters::Create({kGenericPerfEvent1});
+ EXPECT_TRUE(counters.IsValid());
+ PerfCounterValues values1(1);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ PerfCounterValues values2(1);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[0], values1[0]);
+}
+
+TEST(PerfCountersTest, Read2Counters) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters =
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
+ EXPECT_TRUE(counters.IsValid());
+ PerfCounterValues values1(2);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ EXPECT_GT(values1[1], 0);
+ PerfCounterValues values2(2);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[1], 0);
+}
+} // namespace
diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc
new file mode 100644
index 00000000..d6e0284d
--- /dev/null
+++ b/test/perf_counters_test.cc
@@ -0,0 +1,27 @@
+#undef NDEBUG
+
+#include "../src/perf_counters.h"
+
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+void BM_Simple(benchmark::State& state) {
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(state.iterations());
+ }
+}
+BENCHMARK(BM_Simple);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
+
+void CheckSimple(Results const& e) {
+ CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
+ CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0);
+}
+CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
+
+int main(int argc, char* argv[]) {
+ if (!benchmark::internal::PerfCounters::kSupported) {
+ return 0;
+ }
+ RunOutputTests(argc, argv);
+}
diff --git a/test/string_util_gtest.cc b/test/string_util_gtest.cc
index 01bf155d..77a719a6 100644
--- a/test/string_util_gtest.cc
+++ b/test/string_util_gtest.cc
@@ -150,4 +150,12 @@ TEST(StringUtilTest, stod) {
#endif
}
+TEST(StringUtilTest, StrSplit) {
+ EXPECT_EQ(benchmark::StrSplit("", ','), std::vector{""});
+ EXPECT_EQ(benchmark::StrSplit("hello", ','),
+ std::vector({"hello"}));
+ EXPECT_EQ(benchmark::StrSplit("hello,there", ','),
+ std::vector({"hello", "there"}));
+}
+
} // end namespace