mirror of https://github.com/google/benchmark.git
Cache PerfCounters instance in PerfCountersMeasurement (#1308)
This patch fixes #1306, by reducing the pinned instances of PerfCounters. The issue is caused by creating multiple pinned events in the same thread, doing so results in the Snapshot(PerfCounterValues* values) failing, and that's now discoverable. Creating multile pinned events is an unsupported behavior currently. The error would be detected at read() time, not perf_event_open() / iotcl() time. The unsupported benavior above is confirmed by Stephane Eranian @seranian, and he also pointed the dectection method. Finished this patch under the guidance of Mircea Trofin @mtrofin.
This commit is contained in:
parent
57b2bfa33b
commit
d0fbf8ac23
|
@ -184,7 +184,10 @@ void State::PauseTiming() {
|
|||
BM_CHECK(started_ && !finished_ && !error_occurred_);
|
||||
timer_->StopTimer();
|
||||
if (perf_counters_measurement_) {
|
||||
auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
|
||||
std::vector<std::pair<std::string, double>> measurements;
|
||||
if (!perf_counters_measurement_->Stop(measurements)) {
|
||||
BM_CHECK(false) << "Perf counters read the value failed.";
|
||||
}
|
||||
for (const auto& name_and_measurement : measurements) {
|
||||
auto name = name_and_measurement.first;
|
||||
auto measurement = name_and_measurement.second;
|
||||
|
|
|
@ -152,8 +152,7 @@ BenchmarkRunner::BenchmarkRunner(
|
|||
has_explicit_iteration_count(b.iterations() != 0),
|
||||
pool(b.threads() - 1),
|
||||
iters(has_explicit_iteration_count ? b.iterations() : 1),
|
||||
perf_counters_measurement(
|
||||
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
|
||||
perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
|
||||
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
|
||||
? &perf_counters_measurement
|
||||
: nullptr) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "perf_counters.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#if defined HAVE_LIBPFM
|
||||
|
@ -104,7 +105,7 @@ PerfCounters PerfCounters::Create(
|
|||
return PerfCounters(counter_names, std::move(counter_ids));
|
||||
}
|
||||
|
||||
PerfCounters::~PerfCounters() {
|
||||
void PerfCounters::CloseCounters() const {
|
||||
if (counter_ids_.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -126,7 +127,44 @@ PerfCounters PerfCounters::Create(
|
|||
return NoCounters();
|
||||
}
|
||||
|
||||
PerfCounters::~PerfCounters() = default;
|
||||
void PerfCounters::CloseCounters() const {}
|
||||
#endif // defined HAVE_LIBPFM
|
||||
|
||||
Mutex PerfCountersMeasurement::mutex_;
|
||||
int PerfCountersMeasurement::ref_count_ = 0;
|
||||
PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
|
||||
|
||||
PerfCountersMeasurement::PerfCountersMeasurement(
|
||||
const std::vector<std::string>& counter_names)
|
||||
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
|
||||
MutexLock l(mutex_);
|
||||
if (ref_count_ == 0) {
|
||||
counters_ = PerfCounters::Create(counter_names);
|
||||
}
|
||||
// We chose to increment it even if `counters_` ends up invalid,
|
||||
// so that we don't keep trying to create, and also since the dtor
|
||||
// will decrement regardless of `counters_`'s validity
|
||||
++ref_count_;
|
||||
|
||||
BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
|
||||
}
|
||||
|
||||
PerfCountersMeasurement::~PerfCountersMeasurement() {
|
||||
MutexLock l(mutex_);
|
||||
--ref_count_;
|
||||
if (ref_count_ == 0) {
|
||||
counters_ = PerfCounters::NoCounters();
|
||||
}
|
||||
}
|
||||
|
||||
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
|
||||
if (this != &other) {
|
||||
CloseCounters();
|
||||
|
||||
counter_ids_ = std::move(other.counter_ids_);
|
||||
counter_names_ = std::move(other.counter_names_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
||||
|
|
|
@ -17,11 +17,13 @@
|
|||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#include <unistd.h>
|
||||
|
@ -71,12 +73,14 @@ class PerfCounters final {
|
|||
// True iff this platform supports performance counters.
|
||||
static const bool kSupported;
|
||||
|
||||
bool IsValid() const { return is_valid_; }
|
||||
bool IsValid() const { return !counter_names_.empty(); }
|
||||
static PerfCounters NoCounters() { return PerfCounters(); }
|
||||
|
||||
~PerfCounters();
|
||||
~PerfCounters() { CloseCounters(); }
|
||||
PerfCounters(PerfCounters&&) = default;
|
||||
PerfCounters(const PerfCounters&) = delete;
|
||||
PerfCounters& operator=(PerfCounters&&) noexcept;
|
||||
PerfCounters& operator=(const PerfCounters&) = delete;
|
||||
|
||||
// Platform-specific implementations may choose to do some library
|
||||
// initialization here.
|
||||
|
@ -111,24 +115,27 @@ class PerfCounters final {
|
|||
private:
|
||||
PerfCounters(const std::vector<std::string>& counter_names,
|
||||
std::vector<int>&& counter_ids)
|
||||
: counter_ids_(std::move(counter_ids)),
|
||||
counter_names_(counter_names),
|
||||
is_valid_(true) {}
|
||||
PerfCounters() : is_valid_(false) {}
|
||||
: counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
|
||||
PerfCounters() = default;
|
||||
|
||||
void CloseCounters() const;
|
||||
|
||||
std::vector<int> counter_ids_;
|
||||
const std::vector<std::string> counter_names_;
|
||||
const bool is_valid_;
|
||||
std::vector<std::string> counter_names_;
|
||||
};
|
||||
|
||||
// Typical usage of the above primitives.
|
||||
class PerfCountersMeasurement final {
|
||||
public:
|
||||
PerfCountersMeasurement(PerfCounters&& c)
|
||||
: counters_(std::move(c)),
|
||||
start_values_(counters_.IsValid() ? counters_.names().size() : 0),
|
||||
end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
|
||||
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
|
||||
~PerfCountersMeasurement();
|
||||
|
||||
// The only way to get to `counters_` is after ctor-ing a
|
||||
// `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
|
||||
// decided (either invalid or valid) and won't change again even if a ctor is
|
||||
// concurrently running with this. This is preferring efficiency to
|
||||
// maintainability, because the address of the static can be known at compile
|
||||
// time.
|
||||
bool IsValid() const { return counters_.IsValid(); }
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE void Start() {
|
||||
|
@ -136,30 +143,33 @@ class PerfCountersMeasurement final {
|
|||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
counters_.Snapshot(&start_values_);
|
||||
valid_read_ &= counters_.Snapshot(&start_values_);
|
||||
ClobberMemory();
|
||||
}
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
|
||||
StopAndGetMeasurements() {
|
||||
BENCHMARK_ALWAYS_INLINE bool Stop(
|
||||
std::vector<std::pair<std::string, double>>& measurements) {
|
||||
assert(IsValid());
|
||||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
counters_.Snapshot(&end_values_);
|
||||
valid_read_ &= counters_.Snapshot(&end_values_);
|
||||
ClobberMemory();
|
||||
|
||||
std::vector<std::pair<std::string, double>> ret;
|
||||
for (size_t i = 0; i < counters_.names().size(); ++i) {
|
||||
double measurement = static_cast<double>(end_values_[i]) -
|
||||
static_cast<double>(start_values_[i]);
|
||||
ret.push_back({counters_.names()[i], measurement});
|
||||
measurements.push_back({counters_.names()[i], measurement});
|
||||
}
|
||||
return ret;
|
||||
|
||||
return valid_read_;
|
||||
}
|
||||
|
||||
private:
|
||||
PerfCounters counters_;
|
||||
static Mutex mutex_;
|
||||
GUARDED_BY(mutex_) static int ref_count_;
|
||||
GUARDED_BY(mutex_) static PerfCounters counters_;
|
||||
bool valid_read_ = true;
|
||||
PerfCounterValues start_values_;
|
||||
PerfCounterValues end_values_;
|
||||
};
|
||||
|
|
|
@ -11,6 +11,7 @@ struct MsgHandler {
|
|||
#endif
|
||||
|
||||
using benchmark::internal::PerfCounters;
|
||||
using benchmark::internal::PerfCountersMeasurement;
|
||||
using benchmark::internal::PerfCounterValues;
|
||||
|
||||
namespace {
|
||||
|
@ -95,6 +96,53 @@ TEST(PerfCountersTest, Read2Counters) {
|
|||
EXPECT_GT(values2[1], 0);
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, ReopenExistingCounters) {
|
||||
// The test works (i.e. causes read to fail) for the assumptions
|
||||
// about hardware capabilities (i.e. small number (3-4) hardware
|
||||
// counters) at this date.
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
std::vector<PerfCounters> counters;
|
||||
counters.reserve(6);
|
||||
for (int i = 0; i < 6; i++)
|
||||
counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
|
||||
PerfCounterValues values(1);
|
||||
EXPECT_TRUE(counters[0].Snapshot(&values));
|
||||
EXPECT_FALSE(counters[4].Snapshot(&values));
|
||||
EXPECT_FALSE(counters[5].Snapshot(&values));
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, CreateExistingMeasurements) {
|
||||
// The test works (i.e. causes read to fail) for the assumptions
|
||||
// about hardware capabilities (i.e. small number (3-4) hardware
|
||||
// counters) at this date,
|
||||
// the same as previous test ReopenExistingCounters.
|
||||
if (!PerfCounters::kSupported) {
|
||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
std::vector<PerfCountersMeasurement> perf_counter_measurements;
|
||||
std::vector<std::pair<std::string, double>> measurements;
|
||||
|
||||
perf_counter_measurements.reserve(10);
|
||||
for (int i = 0; i < 10; i++)
|
||||
perf_counter_measurements.emplace_back(
|
||||
std::vector<std::string>{kGenericPerfEvent1});
|
||||
|
||||
perf_counter_measurements[0].Start();
|
||||
EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
|
||||
|
||||
measurements.clear();
|
||||
perf_counter_measurements[8].Start();
|
||||
EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
|
||||
|
||||
measurements.clear();
|
||||
perf_counter_measurements[9].Start();
|
||||
EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
|
||||
}
|
||||
|
||||
size_t do_work() {
|
||||
size_t res = 0;
|
||||
for (size_t i = 0; i < 100000000; ++i) res += i * i;
|
||||
|
|
Loading…
Reference in New Issue