Cache PerfCounters instance in PerfCountersMeasurement (#1308)

This patch fixes #1306, by reducing the pinned instances of
PerfCounters.

The issue is caused by creating multiple pinned events in the
same thread, doing so results in the Snapshot(PerfCounterValues* values)
failing, and that's now discoverable.
Creating multile pinned events is an unsupported behavior currently.
The error would be detected at read() time, not
perf_event_open() / iotcl() time.

The unsupported benavior above is confirmed by Stephane Eranian @seranian,
and he also pointed the dectection method.

Finished this patch under the guidance of Mircea Trofin @mtrofin.
This commit is contained in:
Liqiang TAO 2022-01-25 19:14:20 +09:00 committed by GitHub
parent 57b2bfa33b
commit d0fbf8ac23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 123 additions and 25 deletions

View File

@ -184,7 +184,10 @@ void State::PauseTiming() {
BM_CHECK(started_ && !finished_ && !error_occurred_);
timer_->StopTimer();
if (perf_counters_measurement_) {
auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
std::vector<std::pair<std::string, double>> measurements;
if (!perf_counters_measurement_->Stop(measurements)) {
BM_CHECK(false) << "Perf counters read the value failed.";
}
for (const auto& name_and_measurement : measurements) {
auto name = name_and_measurement.first;
auto measurement = name_and_measurement.second;

View File

@ -152,8 +152,7 @@ BenchmarkRunner::BenchmarkRunner(
has_explicit_iteration_count(b.iterations() != 0),
pool(b.threads() - 1),
iters(has_explicit_iteration_count ? b.iterations() : 1),
perf_counters_measurement(
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
? &perf_counters_measurement
: nullptr) {

View File

@ -15,6 +15,7 @@
#include "perf_counters.h"
#include <cstring>
#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
@ -104,7 +105,7 @@ PerfCounters PerfCounters::Create(
return PerfCounters(counter_names, std::move(counter_ids));
}
PerfCounters::~PerfCounters() {
void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
@ -126,7 +127,44 @@ PerfCounters PerfCounters::Create(
return NoCounters();
}
PerfCounters::~PerfCounters() = default;
void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
Mutex PerfCountersMeasurement::mutex_;
int PerfCountersMeasurement::ref_count_ = 0;
PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
MutexLock l(mutex_);
if (ref_count_ == 0) {
counters_ = PerfCounters::Create(counter_names);
}
// We chose to increment it even if `counters_` ends up invalid,
// so that we don't keep trying to create, and also since the dtor
// will decrement regardless of `counters_`'s validity
++ref_count_;
BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
}
PerfCountersMeasurement::~PerfCountersMeasurement() {
MutexLock l(mutex_);
--ref_count_;
if (ref_count_ == 0) {
counters_ = PerfCounters::NoCounters();
}
}
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
if (this != &other) {
CloseCounters();
counter_ids_ = std::move(other.counter_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
}
} // namespace internal
} // namespace benchmark

View File

@ -17,11 +17,13 @@
#include <array>
#include <cstdint>
#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
@ -71,12 +73,14 @@ class PerfCounters final {
// True iff this platform supports performance counters.
static const bool kSupported;
bool IsValid() const { return is_valid_; }
bool IsValid() const { return !counter_names_.empty(); }
static PerfCounters NoCounters() { return PerfCounters(); }
~PerfCounters();
~PerfCounters() { CloseCounters(); }
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
@ -111,24 +115,27 @@ class PerfCounters final {
private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids)
: counter_ids_(std::move(counter_ids)),
counter_names_(counter_names),
is_valid_(true) {}
PerfCounters() : is_valid_(false) {}
: counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
PerfCounters() = default;
void CloseCounters() const;
std::vector<int> counter_ids_;
const std::vector<std::string> counter_names_;
const bool is_valid_;
std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
class PerfCountersMeasurement final {
public:
PerfCountersMeasurement(PerfCounters&& c)
: counters_(std::move(c)),
start_values_(counters_.IsValid() ? counters_.names().size() : 0),
end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
~PerfCountersMeasurement();
// The only way to get to `counters_` is after ctor-ing a
// `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
// decided (either invalid or valid) and won't change again even if a ctor is
// concurrently running with this. This is preferring efficiency to
// maintainability, because the address of the static can be known at compile
// time.
bool IsValid() const { return counters_.IsValid(); }
BENCHMARK_ALWAYS_INLINE void Start() {
@ -136,30 +143,33 @@ class PerfCountersMeasurement final {
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
counters_.Snapshot(&start_values_);
valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
}
BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
StopAndGetMeasurements() {
BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
assert(IsValid());
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
counters_.Snapshot(&end_values_);
valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
std::vector<std::pair<std::string, double>> ret;
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
ret.push_back({counters_.names()[i], measurement});
measurements.push_back({counters_.names()[i], measurement});
}
return ret;
return valid_read_;
}
private:
PerfCounters counters_;
static Mutex mutex_;
GUARDED_BY(mutex_) static int ref_count_;
GUARDED_BY(mutex_) static PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};

View File

@ -11,6 +11,7 @@ struct MsgHandler {
#endif
using benchmark::internal::PerfCounters;
using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
namespace {
@ -95,6 +96,53 @@ TEST(PerfCountersTest, Read2Counters) {
EXPECT_GT(values2[1], 0);
}
TEST(PerfCountersTest, ReopenExistingCounters) {
// The test works (i.e. causes read to fail) for the assumptions
// about hardware capabilities (i.e. small number (3-4) hardware
// counters) at this date.
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
std::vector<PerfCounters> counters;
counters.reserve(6);
for (int i = 0; i < 6; i++)
counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
PerfCounterValues values(1);
EXPECT_TRUE(counters[0].Snapshot(&values));
EXPECT_FALSE(counters[4].Snapshot(&values));
EXPECT_FALSE(counters[5].Snapshot(&values));
}
TEST(PerfCountersTest, CreateExistingMeasurements) {
// The test works (i.e. causes read to fail) for the assumptions
// about hardware capabilities (i.e. small number (3-4) hardware
// counters) at this date,
// the same as previous test ReopenExistingCounters.
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
std::vector<PerfCountersMeasurement> perf_counter_measurements;
std::vector<std::pair<std::string, double>> measurements;
perf_counter_measurements.reserve(10);
for (int i = 0; i < 10; i++)
perf_counter_measurements.emplace_back(
std::vector<std::string>{kGenericPerfEvent1});
perf_counter_measurements[0].Start();
EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
measurements.clear();
perf_counter_measurements[8].Start();
EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
measurements.clear();
perf_counter_measurements[9].Start();
EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
}
size_t do_work() {
size_t res = 0;
for (size_t i = 0; i < 100000000; ++i) res += i * i;