mirror of
https://github.com/google/benchmark.git
synced 2024-11-28 15:34:33 +00:00
9c65aebb26
* perf_counters: Initialize once only when needed This works around some performance problems running Android under QEMU. Calling `pfm_initialize` was very slow, and was called during dynamic initialization (before `main` or when loaded as a shared library). This happened whenever benchmark was linked, even if no benchmarks were run. Instead, call `pfm_initialize` at most once, and only when one of: 1. `PerfCounters::Initialize` is called 2. `PerfCounters::Create` is called with a non-empty counter list 3. `PerfCounters::IsCounterSupported` is called The return value of the first `pfm_initialize()` is saved and returned from all subsequent `PerfCounters::Initialize` calls. * perf_counters: Make success var const * InitLibPfmOnce: Inline function
201 lines
6.7 KiB
C++
201 lines
6.7 KiB
C++
// Copyright 2021 Google Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#ifndef BENCHMARK_PERF_COUNTERS_H
|
|
#define BENCHMARK_PERF_COUNTERS_H
|
|
|
|
#include <array>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "benchmark/benchmark.h"
|
|
#include "check.h"
|
|
#include "log.h"
|
|
#include "mutex.h"
|
|
|
|
#ifndef BENCHMARK_OS_WINDOWS
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#if defined(_MSC_VER)
|
|
#pragma warning(push)
|
|
// C4251: <symbol> needs to have dll-interface to be used by clients of class
|
|
#pragma warning(disable : 4251)
|
|
#endif
|
|
|
|
namespace benchmark {
|
|
namespace internal {
|
|
|
|
// Typically, we can only read a small number of counters. There is also a
|
|
// padding preceding counter values, when reading multiple counters with one
|
|
// syscall (which is desirable). PerfCounterValues abstracts these details.
|
|
// The implementation ensures the storage is inlined, and allows 0-based
|
|
// indexing into the counter values.
|
|
// The object is used in conjunction with a PerfCounters object, by passing it
|
|
// to Snapshot(). The Read() method relocates individual reads, discarding
|
|
// the initial padding from each group leader in the values buffer such that
|
|
// all user accesses through the [] operator are correct.
|
|
class BENCHMARK_EXPORT PerfCounterValues {
|
|
public:
|
|
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
|
|
BM_CHECK_LE(nr_counters_, kMaxCounters);
|
|
}
|
|
|
|
// We are reading correctly now so the values don't need to skip padding
|
|
uint64_t operator[](size_t pos) const { return values_[pos]; }
|
|
|
|
// Increased the maximum to 32 only since the buffer
|
|
// is std::array<> backed
|
|
static constexpr size_t kMaxCounters = 32;
|
|
|
|
private:
|
|
friend class PerfCounters;
|
|
// Get the byte buffer in which perf counters can be captured.
|
|
// This is used by PerfCounters::Read
|
|
std::pair<char*, size_t> get_data_buffer() {
|
|
return {reinterpret_cast<char*>(values_.data()),
|
|
sizeof(uint64_t) * (kPadding + nr_counters_)};
|
|
}
|
|
|
|
// This reading is complex and as the goal of this class is to
|
|
// abstract away the intrincacies of the reading process, this is
|
|
// a better place for it
|
|
size_t Read(const std::vector<int>& leaders);
|
|
|
|
// Move the padding to 2 due to the reading algorithm (1st padding plus a
|
|
// current read padding)
|
|
static constexpr size_t kPadding = 2;
|
|
std::array<uint64_t, kPadding + kMaxCounters> values_;
|
|
const size_t nr_counters_;
|
|
};
|
|
|
|
// Collect PMU counters. The object, once constructed, is ready to be used by
|
|
// calling read(). PMU counter collection is enabled from the time create() is
|
|
// called, to obtain the object, until the object's destructor is called.
|
|
class BENCHMARK_EXPORT PerfCounters final {
|
|
public:
|
|
// True iff this platform supports performance counters.
|
|
static const bool kSupported;
|
|
|
|
// Returns an empty object
|
|
static PerfCounters NoCounters() { return PerfCounters(); }
|
|
|
|
~PerfCounters() { CloseCounters(); }
|
|
PerfCounters() = default;
|
|
PerfCounters(PerfCounters&&) = default;
|
|
PerfCounters(const PerfCounters&) = delete;
|
|
PerfCounters& operator=(PerfCounters&&) noexcept;
|
|
PerfCounters& operator=(const PerfCounters&) = delete;
|
|
|
|
// Platform-specific implementations may choose to do some library
|
|
// initialization here.
|
|
static bool Initialize();
|
|
|
|
// Check if the given counter is supported, if the app wants to
|
|
// check before passing
|
|
static bool IsCounterSupported(const std::string& name);
|
|
|
|
// Return a PerfCounters object ready to read the counters with the names
|
|
// specified. The values are user-mode only. The counter name format is
|
|
// implementation and OS specific.
|
|
// In case of failure, this method will in the worst case return an
|
|
// empty object whose state will still be valid.
|
|
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
|
|
|
// Take a snapshot of the current value of the counters into the provided
|
|
// valid PerfCounterValues storage. The values are populated such that:
|
|
// names()[i]'s value is (*values)[i]
|
|
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
|
|
#ifndef BENCHMARK_OS_WINDOWS
|
|
assert(values != nullptr);
|
|
return values->Read(leader_ids_) == counter_ids_.size();
|
|
#else
|
|
(void)values;
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
const std::vector<std::string>& names() const { return counter_names_; }
|
|
size_t num_counters() const { return counter_names_.size(); }
|
|
|
|
private:
|
|
PerfCounters(const std::vector<std::string>& counter_names,
|
|
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
|
|
: counter_ids_(std::move(counter_ids)),
|
|
leader_ids_(std::move(leader_ids)),
|
|
counter_names_(counter_names) {}
|
|
|
|
void CloseCounters() const;
|
|
|
|
std::vector<int> counter_ids_;
|
|
std::vector<int> leader_ids_;
|
|
std::vector<std::string> counter_names_;
|
|
};
|
|
|
|
// Typical usage of the above primitives.
|
|
class BENCHMARK_EXPORT PerfCountersMeasurement final {
|
|
public:
|
|
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
|
|
|
|
size_t num_counters() const { return counters_.num_counters(); }
|
|
|
|
std::vector<std::string> names() const { return counters_.names(); }
|
|
|
|
BENCHMARK_ALWAYS_INLINE bool Start() {
|
|
if (num_counters() == 0) return true;
|
|
// Tell the compiler to not move instructions above/below where we take
|
|
// the snapshot.
|
|
ClobberMemory();
|
|
valid_read_ &= counters_.Snapshot(&start_values_);
|
|
ClobberMemory();
|
|
|
|
return valid_read_;
|
|
}
|
|
|
|
BENCHMARK_ALWAYS_INLINE bool Stop(
|
|
std::vector<std::pair<std::string, double>>& measurements) {
|
|
if (num_counters() == 0) return true;
|
|
// Tell the compiler to not move instructions above/below where we take
|
|
// the snapshot.
|
|
ClobberMemory();
|
|
valid_read_ &= counters_.Snapshot(&end_values_);
|
|
ClobberMemory();
|
|
|
|
for (size_t i = 0; i < counters_.names().size(); ++i) {
|
|
double measurement = static_cast<double>(end_values_[i]) -
|
|
static_cast<double>(start_values_[i]);
|
|
measurements.push_back({counters_.names()[i], measurement});
|
|
}
|
|
|
|
return valid_read_;
|
|
}
|
|
|
|
private:
|
|
PerfCounters counters_;
|
|
bool valid_read_ = true;
|
|
PerfCounterValues start_values_;
|
|
PerfCounterValues end_values_;
|
|
};
|
|
|
|
} // namespace internal
|
|
} // namespace benchmark
|
|
|
|
#if defined(_MSC_VER)
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
#endif // BENCHMARK_PERF_COUNTERS_H
|