mirror of https://github.com/google/benchmark.git
Refactoring of PerfCounters infrastructure (#1559)
* Refactoring of PerfCounters infrastructure The main feature in this pull request is the removal of the static sharing of PerfCounters and instead creating them at the top `RunBenchmarks()` function where all benchmark runners are created. A single PerfCountersMeasurement object is created and then shared with all the new BenchmarkRunners objects, one per existing benchmark. Other features conflated here in this PR are: - Added BENCHMARK_DONT_OPTIMIZE macro in global scope - Removal of the `IsValid()` query, being replaced by checking the number of remaining counters after validity tests - Refactoring of all GTests to reflect the changes and new semantics - extra comments throughout the new code to clarify intent It was extremely hard to separate all those features in different PRs as requested since they are so interdependent on each other so I'm just pushing them altogether and asking for forgiveness. This PR comes replacing PRs 1555 and 1558 which have been closed. * Fixed whitespace issue with clang-format My clang-format insists in deleting this single white space on line 601 while Github's clang format breaks when it is added. I had to disable format-on-save to check-in this revert change. I'm using clang 14.0.6.
This commit is contained in:
parent
9885aefb96
commit
fbc6efa9b5
|
@ -218,6 +218,18 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
|
||||||
#define BENCHMARK_UNUSED
|
#define BENCHMARK_UNUSED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Used to annotate functions, methods and classes so they
|
||||||
|
// are not optimized by the compiler. Useful for tests
|
||||||
|
// where you expect loops to stay in place churning cycles
|
||||||
|
#if defined(__clang__)
|
||||||
|
#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
|
||||||
|
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||||
|
#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
|
||||||
|
#else
|
||||||
|
// MSVC & Intel do not have a no-optimize attribute, only line pragmas
|
||||||
|
#define BENCHMARK_DONT_OPTIMIZE
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
|
#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
|
||||||
#elif defined(_MSC_VER) && !defined(__clang__)
|
#elif defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
|
|
@ -348,14 +348,26 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||||
|
|
||||||
size_t num_repetitions_total = 0;
|
size_t num_repetitions_total = 0;
|
||||||
|
|
||||||
|
// This perfcounters object needs to be created before the runners vector
|
||||||
|
// below so it outlasts their lifetime.
|
||||||
|
PerfCountersMeasurement perfcounters(
|
||||||
|
StrSplit(FLAGS_benchmark_perf_counters, ','));
|
||||||
|
|
||||||
|
// Vector of benchmarks to run
|
||||||
std::vector<internal::BenchmarkRunner> runners;
|
std::vector<internal::BenchmarkRunner> runners;
|
||||||
runners.reserve(benchmarks.size());
|
runners.reserve(benchmarks.size());
|
||||||
|
|
||||||
|
// Count the number of benchmarks with threads to warn the user in case
|
||||||
|
// performance counters are used.
|
||||||
|
int benchmarks_with_threads = 0;
|
||||||
|
|
||||||
|
// Loop through all benchmarks
|
||||||
for (const BenchmarkInstance& benchmark : benchmarks) {
|
for (const BenchmarkInstance& benchmark : benchmarks) {
|
||||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
|
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
|
||||||
if (benchmark.complexity() != oNone)
|
if (benchmark.complexity() != oNone)
|
||||||
reports_for_family = &per_family_reports[benchmark.family_index()];
|
reports_for_family = &per_family_reports[benchmark.family_index()];
|
||||||
|
benchmarks_with_threads += (benchmark.threads() > 0);
|
||||||
runners.emplace_back(benchmark, reports_for_family);
|
runners.emplace_back(benchmark, &perfcounters, reports_for_family);
|
||||||
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
|
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
|
||||||
num_repetitions_total += num_repeats_of_this_instance;
|
num_repetitions_total += num_repeats_of_this_instance;
|
||||||
if (reports_for_family)
|
if (reports_for_family)
|
||||||
|
@ -363,6 +375,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||||
}
|
}
|
||||||
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
|
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
|
||||||
|
|
||||||
|
// The use of performance counters with threads would be unintuitive for
|
||||||
|
// the average user so we need to warn them about this case
|
||||||
|
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
|
||||||
|
GetErrorLogInstance()
|
||||||
|
<< "***WARNING*** There are " << benchmarks_with_threads
|
||||||
|
<< " benchmarks with threads and " << perfcounters.num_counters()
|
||||||
|
<< " performance counters were requested. Beware counters will "
|
||||||
|
"reflect the combined usage across all "
|
||||||
|
"threads.\n";
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<size_t> repetition_indices;
|
std::vector<size_t> repetition_indices;
|
||||||
repetition_indices.reserve(num_repetitions_total);
|
repetition_indices.reserve(num_repetitions_total);
|
||||||
for (size_t runner_index = 0, num_runners = runners.size();
|
for (size_t runner_index = 0, num_runners = runners.size();
|
||||||
|
|
|
@ -221,6 +221,7 @@ BenchTimeType ParseBenchMinTime(const std::string& value) {
|
||||||
|
|
||||||
BenchmarkRunner::BenchmarkRunner(
|
BenchmarkRunner::BenchmarkRunner(
|
||||||
const benchmark::internal::BenchmarkInstance& b_,
|
const benchmark::internal::BenchmarkInstance& b_,
|
||||||
|
PerfCountersMeasurement* pcm_,
|
||||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
|
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
|
||||||
: b(b_),
|
: b(b_),
|
||||||
reports_for_family(reports_for_family_),
|
reports_for_family(reports_for_family_),
|
||||||
|
@ -239,10 +240,7 @@ BenchmarkRunner::BenchmarkRunner(
|
||||||
iters(has_explicit_iteration_count
|
iters(has_explicit_iteration_count
|
||||||
? ComputeIters(b_, parsed_benchtime_flag)
|
? ComputeIters(b_, parsed_benchtime_flag)
|
||||||
: 1),
|
: 1),
|
||||||
perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
|
perf_counters_measurement_ptr(pcm_) {
|
||||||
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
|
|
||||||
? &perf_counters_measurement
|
|
||||||
: nullptr) {
|
|
||||||
run_results.display_report_aggregates_only =
|
run_results.display_report_aggregates_only =
|
||||||
(FLAGS_benchmark_report_aggregates_only ||
|
(FLAGS_benchmark_report_aggregates_only ||
|
||||||
FLAGS_benchmark_display_aggregates_only);
|
FLAGS_benchmark_display_aggregates_only);
|
||||||
|
@ -255,7 +253,7 @@ BenchmarkRunner::BenchmarkRunner(
|
||||||
run_results.file_report_aggregates_only =
|
run_results.file_report_aggregates_only =
|
||||||
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
|
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
|
||||||
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
||||||
perf_counters_measurement.IsValid())
|
(perf_counters_measurement_ptr->num_counters() == 0))
|
||||||
<< "Perf counters were requested but could not be set up.";
|
<< "Perf counters were requested but could not be set up.";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ BenchTimeType ParseBenchMinTime(const std::string& value);
|
||||||
class BenchmarkRunner {
|
class BenchmarkRunner {
|
||||||
public:
|
public:
|
||||||
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
||||||
|
benchmark::internal::PerfCountersMeasurement* pmc_,
|
||||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
|
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
|
||||||
|
|
||||||
int GetNumRepeats() const { return repeats; }
|
int GetNumRepeats() const { return repeats; }
|
||||||
|
@ -103,8 +104,7 @@ class BenchmarkRunner {
|
||||||
// So only the first repetition has to find/calculate it,
|
// So only the first repetition has to find/calculate it,
|
||||||
// the other repetitions will just use that precomputed iteration count.
|
// the other repetitions will just use that precomputed iteration count.
|
||||||
|
|
||||||
PerfCountersMeasurement perf_counters_measurement;
|
PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
|
||||||
PerfCountersMeasurement* const perf_counters_measurement_ptr;
|
|
||||||
|
|
||||||
struct IterationResults {
|
struct IterationResults {
|
||||||
internal::ThreadManager::Result results;
|
internal::ThreadManager::Result results;
|
||||||
|
|
|
@ -71,80 +71,78 @@ bool PerfCounters::IsCounterSupported(const std::string& name) {
|
||||||
return (ret == PFM_SUCCESS);
|
return (ret == PFM_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validates all counter names passed, returning only the valid ones
|
|
||||||
static std::vector<std::string> validateCounters(
|
|
||||||
const std::vector<std::string>& counter_names) {
|
|
||||||
// All valid names to be returned
|
|
||||||
std::vector<std::string> valid_names;
|
|
||||||
|
|
||||||
// Loop through all the given names
|
|
||||||
int invalid_counter = 0;
|
|
||||||
for (const std::string& name : counter_names) {
|
|
||||||
// Check trivial empty
|
|
||||||
if (name.empty()) {
|
|
||||||
GetErrorLogInstance() << "A counter name was the empty string\n";
|
|
||||||
invalid_counter++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (PerfCounters::IsCounterSupported(name)) {
|
|
||||||
// we are about to push into the valid names vector
|
|
||||||
// check if we did not reach the maximum
|
|
||||||
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
|
|
||||||
GetErrorLogInstance()
|
|
||||||
<< counter_names.size()
|
|
||||||
<< " counters were requested. The maximum is "
|
|
||||||
<< PerfCounterValues::kMaxCounters << " and "
|
|
||||||
<< counter_names.size() - invalid_counter - valid_names.size()
|
|
||||||
<< " will be ignored\n";
|
|
||||||
// stop the loop and return what we have already
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
valid_names.push_back(name);
|
|
||||||
} else {
|
|
||||||
GetErrorLogInstance() << "Performance counter " << name
|
|
||||||
<< " incorrect or not supported on this platform\n";
|
|
||||||
invalid_counter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// RVO should take care of this
|
|
||||||
return valid_names;
|
|
||||||
}
|
|
||||||
|
|
||||||
PerfCounters PerfCounters::Create(
|
PerfCounters PerfCounters::Create(
|
||||||
const std::vector<std::string>& counter_names) {
|
const std::vector<std::string>& counter_names) {
|
||||||
std::vector<std::string> valid_names = validateCounters(counter_names);
|
// Valid counters will populate these arrays but we start empty
|
||||||
if (valid_names.empty()) {
|
std::vector<std::string> valid_names;
|
||||||
return NoCounters();
|
std::vector<int> counter_ids;
|
||||||
}
|
|
||||||
std::vector<int> counter_ids(valid_names.size());
|
|
||||||
std::vector<int> leader_ids;
|
std::vector<int> leader_ids;
|
||||||
|
|
||||||
const int mode = PFM_PLM3; // user mode only
|
// Resize to the maximum possible
|
||||||
|
valid_names.reserve(counter_names.size());
|
||||||
|
counter_ids.reserve(counter_names.size());
|
||||||
|
|
||||||
|
const int kCounterMode = PFM_PLM3; // user mode only
|
||||||
|
|
||||||
|
// Group leads will be assigned on demand. The idea is that once we cannot
|
||||||
|
// create a counter descriptor, the reason is that this group has maxed out
|
||||||
|
// so we set the group_id again to -1 and retry - giving the algorithm a
|
||||||
|
// chance to create a new group leader to hold the next set of counters.
|
||||||
int group_id = -1;
|
int group_id = -1;
|
||||||
for (size_t i = 0; i < valid_names.size(); ++i) {
|
|
||||||
|
// Loop through all performance counters
|
||||||
|
for (size_t i = 0; i < counter_names.size(); ++i) {
|
||||||
|
// we are about to push into the valid names vector
|
||||||
|
// check if we did not reach the maximum
|
||||||
|
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
|
||||||
|
// Log a message if we maxed out and stop adding
|
||||||
|
GetErrorLogInstance()
|
||||||
|
<< counter_names.size() << " counters were requested. The maximum is "
|
||||||
|
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
|
||||||
|
<< " were already added. All remaining counters will be ignored\n";
|
||||||
|
// stop the loop and return what we have already
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this name is empty
|
||||||
|
const auto& name = counter_names[i];
|
||||||
|
if (name.empty()) {
|
||||||
|
GetErrorLogInstance()
|
||||||
|
<< "A performance counter name was the empty string\n";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here first means first in group, ie the group leader
|
||||||
const bool is_first = (group_id < 0);
|
const bool is_first = (group_id < 0);
|
||||||
|
|
||||||
|
// This struct will be populated by libpfm from the counter string
|
||||||
|
// and then fed into the syscall perf_event_open
|
||||||
struct perf_event_attr attr {};
|
struct perf_event_attr attr {};
|
||||||
attr.size = sizeof(attr);
|
attr.size = sizeof(attr);
|
||||||
const auto& name = valid_names[i];
|
|
||||||
|
// This is the input struct to libpfm.
|
||||||
pfm_perf_encode_arg_t arg{};
|
pfm_perf_encode_arg_t arg{};
|
||||||
arg.attr = &attr;
|
arg.attr = &attr;
|
||||||
|
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
|
||||||
const int pfm_get =
|
PFM_OS_PERF_EVENT, &arg);
|
||||||
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
|
|
||||||
if (pfm_get != PFM_SUCCESS) {
|
if (pfm_get != PFM_SUCCESS) {
|
||||||
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
|
GetErrorLogInstance()
|
||||||
return NoCounters();
|
<< "Unknown performance counter name: " << name << "\n";
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
attr.disabled = is_first;
|
|
||||||
|
// We then proceed to populate the remaining fields in our attribute struct
|
||||||
// Note: the man page for perf_event_create suggests inherit = true and
|
// Note: the man page for perf_event_create suggests inherit = true and
|
||||||
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
|
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
|
||||||
// case.
|
// case.
|
||||||
|
attr.disabled = is_first;
|
||||||
attr.inherit = true;
|
attr.inherit = true;
|
||||||
attr.pinned = is_first;
|
attr.pinned = is_first;
|
||||||
attr.exclude_kernel = true;
|
attr.exclude_kernel = true;
|
||||||
attr.exclude_user = false;
|
attr.exclude_user = false;
|
||||||
attr.exclude_hv = true;
|
attr.exclude_hv = true;
|
||||||
// Read all counters in one read.
|
|
||||||
|
// Read all counters in a group in one read.
|
||||||
attr.read_format = PERF_FORMAT_GROUP;
|
attr.read_format = PERF_FORMAT_GROUP;
|
||||||
|
|
||||||
int id = -1;
|
int id = -1;
|
||||||
|
@ -159,36 +157,64 @@ PerfCounters PerfCounters::Create(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (id < 0) {
|
if (id < 0) {
|
||||||
// We reached a limit perhaps?
|
// If the file descriptor is negative we might have reached a limit
|
||||||
|
// in the current group. Set the group_id to -1 and retry
|
||||||
if (group_id >= 0) {
|
if (group_id >= 0) {
|
||||||
// Create a new group
|
// Create a new group
|
||||||
group_id = -1;
|
group_id = -1;
|
||||||
} else {
|
} else {
|
||||||
// Give up, there is nothing else to try
|
// At this point we have already retried to set a new group id and
|
||||||
|
// failed. We then give up.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We failed to get a new file descriptor. We might have reached a hard
|
||||||
|
// hardware limit that cannot be resolved even with group multiplexing
|
||||||
if (id < 0) {
|
if (id < 0) {
|
||||||
GetErrorLogInstance()
|
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
|
||||||
<< "Failed to get a file descriptor for " << name << "\n";
|
"for performance counter "
|
||||||
return NoCounters();
|
<< name << ". Ignoring\n";
|
||||||
|
|
||||||
|
// We give up on this counter but try to keep going
|
||||||
|
// as the others would be fine
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
if (group_id < 0) {
|
if (group_id < 0) {
|
||||||
// This is a leader, store and assign it
|
// This is a leader, store and assign it to the current file descriptor
|
||||||
leader_ids.push_back(id);
|
leader_ids.push_back(id);
|
||||||
group_id = id;
|
group_id = id;
|
||||||
}
|
}
|
||||||
counter_ids[i] = id;
|
// This is a valid counter, add it to our descriptor's list
|
||||||
|
counter_ids.push_back(id);
|
||||||
|
valid_names.push_back(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Loop through all group leaders activating them
|
||||||
|
// There is another option of starting ALL counters in a process but
|
||||||
|
// that would be far reaching an intrusion. If the user is using PMCs
|
||||||
|
// by themselves then this would have a side effect on them. It is
|
||||||
|
// friendlier to loop through all groups individually.
|
||||||
for (int lead : leader_ids) {
|
for (int lead : leader_ids) {
|
||||||
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
|
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
|
||||||
GetErrorLogInstance() << "Failed to start counters\n";
|
// This should never happen but if it does, we give up on the
|
||||||
|
// entire batch as recovery would be a mess.
|
||||||
|
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
|
||||||
|
"Claring out all counters.\n";
|
||||||
|
|
||||||
|
// Close all peformance counters
|
||||||
|
for (int id : counter_ids) {
|
||||||
|
::close(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an empty object so our internal state is still good and
|
||||||
|
// the process can continue normally without impact
|
||||||
return NoCounters();
|
return NoCounters();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return PerfCounters(valid_names, std::move(counter_ids),
|
return PerfCounters(std::move(valid_names), std::move(counter_ids),
|
||||||
std::move(leader_ids));
|
std::move(leader_ids));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,34 +249,10 @@ PerfCounters PerfCounters::Create(
|
||||||
void PerfCounters::CloseCounters() const {}
|
void PerfCounters::CloseCounters() const {}
|
||||||
#endif // defined HAVE_LIBPFM
|
#endif // defined HAVE_LIBPFM
|
||||||
|
|
||||||
Mutex PerfCountersMeasurement::mutex_;
|
|
||||||
int PerfCountersMeasurement::ref_count_ = 0;
|
|
||||||
PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
|
|
||||||
|
|
||||||
// The validation in PerfCounter::Create will create less counters than passed
|
|
||||||
// so it should be okay to initialize start_values_ and end_values_ with the
|
|
||||||
// upper bound as passed
|
|
||||||
PerfCountersMeasurement::PerfCountersMeasurement(
|
PerfCountersMeasurement::PerfCountersMeasurement(
|
||||||
const std::vector<std::string>& counter_names)
|
const std::vector<std::string>& counter_names)
|
||||||
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
|
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
|
||||||
MutexLock l(mutex_);
|
counters_ = PerfCounters::Create(counter_names);
|
||||||
if (ref_count_ == 0) {
|
|
||||||
counters_ = PerfCounters::Create(counter_names);
|
|
||||||
}
|
|
||||||
// We chose to increment it even if `counters_` ends up invalid,
|
|
||||||
// so that we don't keep trying to create, and also since the dtor
|
|
||||||
// will decrement regardless of `counters_`'s validity
|
|
||||||
++ref_count_;
|
|
||||||
|
|
||||||
BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
|
|
||||||
}
|
|
||||||
|
|
||||||
PerfCountersMeasurement::~PerfCountersMeasurement() {
|
|
||||||
MutexLock l(mutex_);
|
|
||||||
--ref_count_;
|
|
||||||
if (ref_count_ == 0) {
|
|
||||||
counters_ = PerfCounters::NoCounters();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
|
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
|
||||||
|
|
|
@ -90,10 +90,11 @@ class BENCHMARK_EXPORT PerfCounters final {
|
||||||
// True iff this platform supports performance counters.
|
// True iff this platform supports performance counters.
|
||||||
static const bool kSupported;
|
static const bool kSupported;
|
||||||
|
|
||||||
bool IsValid() const { return !counter_names_.empty(); }
|
// Returns an empty object
|
||||||
static PerfCounters NoCounters() { return PerfCounters(); }
|
static PerfCounters NoCounters() { return PerfCounters(); }
|
||||||
|
|
||||||
~PerfCounters() { CloseCounters(); }
|
~PerfCounters() { CloseCounters(); }
|
||||||
|
PerfCounters() = default;
|
||||||
PerfCounters(PerfCounters&&) = default;
|
PerfCounters(PerfCounters&&) = default;
|
||||||
PerfCounters(const PerfCounters&) = delete;
|
PerfCounters(const PerfCounters&) = delete;
|
||||||
PerfCounters& operator=(PerfCounters&&) noexcept;
|
PerfCounters& operator=(PerfCounters&&) noexcept;
|
||||||
|
@ -110,8 +111,8 @@ class BENCHMARK_EXPORT PerfCounters final {
|
||||||
// Return a PerfCounters object ready to read the counters with the names
|
// Return a PerfCounters object ready to read the counters with the names
|
||||||
// specified. The values are user-mode only. The counter name format is
|
// specified. The values are user-mode only. The counter name format is
|
||||||
// implementation and OS specific.
|
// implementation and OS specific.
|
||||||
// TODO: once we move to C++-17, this should be a std::optional, and then the
|
// In case of failure, this method will in the worst case return an
|
||||||
// IsValid() boolean can be dropped.
|
// empty object whose state will still be valid.
|
||||||
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
||||||
|
|
||||||
// Take a snapshot of the current value of the counters into the provided
|
// Take a snapshot of the current value of the counters into the provided
|
||||||
|
@ -120,7 +121,6 @@ class BENCHMARK_EXPORT PerfCounters final {
|
||||||
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
|
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
|
||||||
#ifndef BENCHMARK_OS_WINDOWS
|
#ifndef BENCHMARK_OS_WINDOWS
|
||||||
assert(values != nullptr);
|
assert(values != nullptr);
|
||||||
assert(IsValid());
|
|
||||||
return values->Read(leader_ids_) == counter_ids_.size();
|
return values->Read(leader_ids_) == counter_ids_.size();
|
||||||
#else
|
#else
|
||||||
(void)values;
|
(void)values;
|
||||||
|
@ -137,7 +137,6 @@ class BENCHMARK_EXPORT PerfCounters final {
|
||||||
: counter_ids_(std::move(counter_ids)),
|
: counter_ids_(std::move(counter_ids)),
|
||||||
leader_ids_(std::move(leader_ids)),
|
leader_ids_(std::move(leader_ids)),
|
||||||
counter_names_(counter_names) {}
|
counter_names_(counter_names) {}
|
||||||
PerfCounters() = default;
|
|
||||||
|
|
||||||
void CloseCounters() const;
|
void CloseCounters() const;
|
||||||
|
|
||||||
|
@ -150,33 +149,25 @@ class BENCHMARK_EXPORT PerfCounters final {
|
||||||
class BENCHMARK_EXPORT PerfCountersMeasurement final {
|
class BENCHMARK_EXPORT PerfCountersMeasurement final {
|
||||||
public:
|
public:
|
||||||
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
|
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
|
||||||
~PerfCountersMeasurement();
|
|
||||||
|
|
||||||
// The only way to get to `counters_` is after ctor-ing a
|
size_t num_counters() const { return counters_.num_counters(); }
|
||||||
// `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
|
|
||||||
// decided (either invalid or valid) and won't change again even if a ctor is
|
|
||||||
// concurrently running with this. This is preferring efficiency to
|
|
||||||
// maintainability, because the address of the static can be known at compile
|
|
||||||
// time.
|
|
||||||
bool IsValid() const {
|
|
||||||
MutexLock l(mutex_);
|
|
||||||
return counters_.IsValid();
|
|
||||||
}
|
|
||||||
|
|
||||||
BENCHMARK_ALWAYS_INLINE void Start() {
|
std::vector<std::string> names() const { return counters_.names(); }
|
||||||
assert(IsValid());
|
|
||||||
MutexLock l(mutex_);
|
BENCHMARK_ALWAYS_INLINE bool Start() {
|
||||||
|
if (num_counters() == 0) return true;
|
||||||
// Tell the compiler to not move instructions above/below where we take
|
// Tell the compiler to not move instructions above/below where we take
|
||||||
// the snapshot.
|
// the snapshot.
|
||||||
ClobberMemory();
|
ClobberMemory();
|
||||||
valid_read_ &= counters_.Snapshot(&start_values_);
|
valid_read_ &= counters_.Snapshot(&start_values_);
|
||||||
ClobberMemory();
|
ClobberMemory();
|
||||||
|
|
||||||
|
return valid_read_;
|
||||||
}
|
}
|
||||||
|
|
||||||
BENCHMARK_ALWAYS_INLINE bool Stop(
|
BENCHMARK_ALWAYS_INLINE bool Stop(
|
||||||
std::vector<std::pair<std::string, double>>& measurements) {
|
std::vector<std::pair<std::string, double>>& measurements) {
|
||||||
assert(IsValid());
|
if (num_counters() == 0) return true;
|
||||||
MutexLock l(mutex_);
|
|
||||||
// Tell the compiler to not move instructions above/below where we take
|
// Tell the compiler to not move instructions above/below where we take
|
||||||
// the snapshot.
|
// the snapshot.
|
||||||
ClobberMemory();
|
ClobberMemory();
|
||||||
|
@ -193,9 +184,7 @@ class BENCHMARK_EXPORT PerfCountersMeasurement final {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static Mutex mutex_;
|
PerfCounters counters_;
|
||||||
GUARDED_BY(mutex_) static int ref_count_;
|
|
||||||
GUARDED_BY(mutex_) static PerfCounters counters_;
|
|
||||||
bool valid_read_ = true;
|
bool valid_read_ = true;
|
||||||
PerfCounterValues start_values_;
|
PerfCounterValues start_values_;
|
||||||
PerfCounterValues end_values_;
|
PerfCounterValues end_values_;
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <random>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "../src/perf_counters.h"
|
#include "../src/perf_counters.h"
|
||||||
|
@ -28,7 +29,7 @@ TEST(PerfCountersTest, OneCounter) {
|
||||||
GTEST_SKIP() << "Performance counters not supported.\n";
|
GTEST_SKIP() << "Performance counters not supported.\n";
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
|
EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfCountersTest, NegativeTest) {
|
TEST(PerfCountersTest, NegativeTest) {
|
||||||
|
@ -37,38 +38,42 @@ TEST(PerfCountersTest, NegativeTest) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
EXPECT_FALSE(PerfCounters::Create({}).IsValid());
|
// Sanity checks
|
||||||
EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
|
// Create() will always create a valid object, even if passed no or
|
||||||
EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
|
// wrong arguments as the new behavior is to warn and drop unsupported
|
||||||
EXPECT_TRUE(PerfCounters::Create(
|
// counters
|
||||||
{kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
|
EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
|
||||||
.IsValid());
|
EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
|
||||||
|
EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
|
||||||
{
|
{
|
||||||
|
// Try sneaking in a bad egg to see if it is filtered out. The
|
||||||
|
// number of counters has to be two, not zero
|
||||||
auto counter =
|
auto counter =
|
||||||
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
|
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
|
||||||
EXPECT_TRUE(counter.IsValid());
|
|
||||||
EXPECT_EQ(counter.num_counters(), 2);
|
EXPECT_EQ(counter.num_counters(), 2);
|
||||||
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
||||||
{kGenericPerfEvent2, kGenericPerfEvent1}));
|
{kGenericPerfEvent2, kGenericPerfEvent1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
// Try sneaking in an outrageous counter, like a fat finger mistake
|
||||||
auto counter = PerfCounters::Create(
|
auto counter = PerfCounters::Create(
|
||||||
{kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
|
{kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
|
||||||
EXPECT_TRUE(counter.IsValid());
|
|
||||||
EXPECT_EQ(counter.num_counters(), 2);
|
EXPECT_EQ(counter.num_counters(), 2);
|
||||||
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
||||||
{kGenericPerfEvent3, kGenericPerfEvent1}));
|
{kGenericPerfEvent3, kGenericPerfEvent1}));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
// Finally try a golden input - it should like all them
|
||||||
kGenericPerfEvent3})
|
EXPECT_EQ(PerfCounters::Create(
|
||||||
.IsValid());
|
{kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
|
||||||
|
.num_counters(),
|
||||||
|
3);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
// Add a bad apple in the end of the chain to check the edges
|
||||||
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||||
kGenericPerfEvent3,
|
kGenericPerfEvent3,
|
||||||
"MISPREDICTED_BRANCH_RETIRED"});
|
"MISPREDICTED_BRANCH_RETIRED"});
|
||||||
EXPECT_TRUE(counter.IsValid());
|
|
||||||
EXPECT_EQ(counter.num_counters(), 3);
|
EXPECT_EQ(counter.num_counters(), 3);
|
||||||
EXPECT_EQ(counter.names(),
|
EXPECT_EQ(counter.names(),
|
||||||
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
|
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||||
|
@ -82,7 +87,7 @@ TEST(PerfCountersTest, Read1Counter) {
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
||||||
EXPECT_TRUE(counters.IsValid());
|
EXPECT_EQ(counters.num_counters(), 1);
|
||||||
PerfCounterValues values1(1);
|
PerfCounterValues values1(1);
|
||||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||||
EXPECT_GT(values1[0], 0);
|
EXPECT_GT(values1[0], 0);
|
||||||
|
@ -99,7 +104,7 @@ TEST(PerfCountersTest, Read2Counters) {
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
auto counters =
|
auto counters =
|
||||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
||||||
EXPECT_TRUE(counters.IsValid());
|
EXPECT_EQ(counters.num_counters(), 2);
|
||||||
PerfCounterValues values1(2);
|
PerfCounterValues values1(2);
|
||||||
EXPECT_TRUE(counters.Snapshot(&values1));
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||||
EXPECT_GT(values1[0], 0);
|
EXPECT_GT(values1[0], 0);
|
||||||
|
@ -111,62 +116,107 @@ TEST(PerfCountersTest, Read2Counters) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfCountersTest, ReopenExistingCounters) {
|
TEST(PerfCountersTest, ReopenExistingCounters) {
|
||||||
// The test works (i.e. causes read to fail) for the assumptions
|
// This test works in recent and old Intel hardware
|
||||||
// about hardware capabilities (i.e. small number (3-4) hardware
|
// However we cannot make assumptions beyond 3 HW counters
|
||||||
// counters) at this date.
|
|
||||||
if (!PerfCounters::kSupported) {
|
if (!PerfCounters::kSupported) {
|
||||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
std::vector<PerfCounters> counters;
|
std::vector<std::string> kMetrics({kGenericPerfEvent1});
|
||||||
counters.reserve(6);
|
std::vector<PerfCounters> counters(3);
|
||||||
for (int i = 0; i < 6; i++)
|
for (auto& counter : counters) {
|
||||||
counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
|
counter = PerfCounters::Create(kMetrics);
|
||||||
|
}
|
||||||
PerfCounterValues values(1);
|
PerfCounterValues values(1);
|
||||||
EXPECT_TRUE(counters[0].Snapshot(&values));
|
EXPECT_TRUE(counters[0].Snapshot(&values));
|
||||||
EXPECT_FALSE(counters[4].Snapshot(&values));
|
EXPECT_TRUE(counters[1].Snapshot(&values));
|
||||||
EXPECT_FALSE(counters[5].Snapshot(&values));
|
EXPECT_TRUE(counters[2].Snapshot(&values));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfCountersTest, CreateExistingMeasurements) {
|
TEST(PerfCountersTest, CreateExistingMeasurements) {
|
||||||
// The test works (i.e. causes read to fail) for the assumptions
|
// The test works (i.e. causes read to fail) for the assumptions
|
||||||
// about hardware capabilities (i.e. small number (3-4) hardware
|
// about hardware capabilities (i.e. small number (3) hardware
|
||||||
// counters) at this date,
|
// counters) at this date,
|
||||||
// the same as previous test ReopenExistingCounters.
|
// the same as previous test ReopenExistingCounters.
|
||||||
if (!PerfCounters::kSupported) {
|
if (!PerfCounters::kSupported) {
|
||||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
std::vector<PerfCountersMeasurement> perf_counter_measurements;
|
|
||||||
|
// This means we will try 10 counters but we can only guarantee
|
||||||
|
// for sure at this time that only 3 will work. Perhaps in the future
|
||||||
|
// we could use libpfm to query for the hardware limits on this
|
||||||
|
// particular platform.
|
||||||
|
const int kMaxCounters = 10;
|
||||||
|
const int kMinValidCounters = 3;
|
||||||
|
|
||||||
|
// Let's use a ubiquitous counter that is guaranteed to work
|
||||||
|
// on all platforms
|
||||||
|
const std::vector<std::string> kMetrics{"cycles"};
|
||||||
|
|
||||||
|
// Cannot create a vector of actual objects because the
|
||||||
|
// copy constructor of PerfCounters is deleted - and so is
|
||||||
|
// implicitly deleted on PerfCountersMeasurement too
|
||||||
|
std::vector<std::unique_ptr<PerfCountersMeasurement>>
|
||||||
|
perf_counter_measurements;
|
||||||
|
|
||||||
|
perf_counter_measurements.reserve(kMaxCounters);
|
||||||
|
for (int j = 0; j < kMaxCounters; ++j) {
|
||||||
|
perf_counter_measurements.emplace_back(
|
||||||
|
new PerfCountersMeasurement(kMetrics));
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::pair<std::string, double>> measurements;
|
std::vector<std::pair<std::string, double>> measurements;
|
||||||
|
|
||||||
perf_counter_measurements.reserve(10);
|
// Start all counters together to see if they hold
|
||||||
for (int i = 0; i < 10; i++)
|
int max_counters = kMaxCounters;
|
||||||
perf_counter_measurements.emplace_back(
|
for (int i = 0; i < kMaxCounters; ++i) {
|
||||||
std::vector<std::string>{kGenericPerfEvent1});
|
auto& counter(*perf_counter_measurements[i]);
|
||||||
|
EXPECT_EQ(counter.num_counters(), 1);
|
||||||
|
if (!counter.Start()) {
|
||||||
|
max_counters = i;
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
perf_counter_measurements[0].Start();
|
ASSERT_GE(max_counters, kMinValidCounters);
|
||||||
EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
|
|
||||||
|
|
||||||
measurements.clear();
|
// Start all together
|
||||||
perf_counter_measurements[8].Start();
|
for (int i = 0; i < max_counters; ++i) {
|
||||||
EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
|
auto& counter(*perf_counter_measurements[i]);
|
||||||
|
EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
|
||||||
|
}
|
||||||
|
|
||||||
measurements.clear();
|
// Start/stop individually
|
||||||
perf_counter_measurements[9].Start();
|
for (int i = 0; i < max_counters; ++i) {
|
||||||
EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
|
auto& counter(*perf_counter_measurements[i]);
|
||||||
|
measurements.clear();
|
||||||
|
counter.Start();
|
||||||
|
EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t do_work() {
|
// We try to do some meaningful work here but the compiler
|
||||||
size_t res = 0;
|
// insists in optimizing away our loop so we had to add a
|
||||||
for (size_t i = 0; i < 100000000; ++i) res += i * i;
|
// no-optimize macro. In case it fails, we added some entropy
|
||||||
return res;
|
// to this pool as well.
|
||||||
|
|
||||||
|
BENCHMARK_DONT_OPTIMIZE size_t do_work() {
|
||||||
|
static std::mt19937 rd{std::random_device{}()};
|
||||||
|
static std::uniform_int_distribution<size_t> mrand(0, 10);
|
||||||
|
const size_t kNumLoops = 1000000;
|
||||||
|
size_t sum = 0;
|
||||||
|
for (size_t j = 0; j < kNumLoops; ++j) {
|
||||||
|
sum += mrand(rd);
|
||||||
|
}
|
||||||
|
benchmark::DoNotOptimize(sum);
|
||||||
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
void measure(size_t threadcount, PerfCounterValues* values1,
|
void measure(size_t threadcount, PerfCounterValues* before,
|
||||||
PerfCounterValues* values2) {
|
PerfCounterValues* after) {
|
||||||
BM_CHECK_NE(values1, nullptr);
|
BM_CHECK_NE(before, nullptr);
|
||||||
BM_CHECK_NE(values2, nullptr);
|
BM_CHECK_NE(after, nullptr);
|
||||||
std::vector<std::thread> threads(threadcount);
|
std::vector<std::thread> threads(threadcount);
|
||||||
auto work = [&]() { BM_CHECK(do_work() > 1000); };
|
auto work = [&]() { BM_CHECK(do_work() > 1000); };
|
||||||
|
|
||||||
|
@ -178,9 +228,9 @@ void measure(size_t threadcount, PerfCounterValues* values1,
|
||||||
auto counters =
|
auto counters =
|
||||||
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
|
||||||
for (auto& t : threads) t = std::thread(work);
|
for (auto& t : threads) t = std::thread(work);
|
||||||
counters.Snapshot(values1);
|
counters.Snapshot(before);
|
||||||
for (auto& t : threads) t.join();
|
for (auto& t : threads) t.join();
|
||||||
counters.Snapshot(values2);
|
counters.Snapshot(after);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfCountersTest, MultiThreaded) {
|
TEST(PerfCountersTest, MultiThreaded) {
|
||||||
|
@ -188,21 +238,29 @@ TEST(PerfCountersTest, MultiThreaded) {
|
||||||
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
|
||||||
}
|
}
|
||||||
EXPECT_TRUE(PerfCounters::Initialize());
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
PerfCounterValues values1(2);
|
PerfCounterValues before(2);
|
||||||
PerfCounterValues values2(2);
|
PerfCounterValues after(2);
|
||||||
|
|
||||||
measure(2, &values1, &values2);
|
// Notice that this test will work even if we taskset it to a single CPU
|
||||||
std::vector<double> D1{static_cast<double>(values2[0] - values1[0]),
|
// In this case the threads will run sequentially
|
||||||
static_cast<double>(values2[1] - values1[1])};
|
// Start two threads and measure the number of combined cycles and
|
||||||
|
// instructions
|
||||||
|
measure(2, &before, &after);
|
||||||
|
std::vector<double> Elapsed2Threads{
|
||||||
|
static_cast<double>(after[0] - before[0]),
|
||||||
|
static_cast<double>(after[1] - before[1])};
|
||||||
|
|
||||||
measure(4, &values1, &values2);
|
// Start four threads and measure the number of combined cycles and
|
||||||
std::vector<double> D2{static_cast<double>(values2[0] - values1[0]),
|
// instructions
|
||||||
static_cast<double>(values2[1] - values1[1])};
|
measure(4, &before, &after);
|
||||||
|
std::vector<double> Elapsed4Threads{
|
||||||
|
static_cast<double>(after[0] - before[0]),
|
||||||
|
static_cast<double>(after[1] - before[1])};
|
||||||
|
|
||||||
// Some extra work will happen on the main thread - like joining the threads
|
// Some extra work will happen on the main thread - like joining the threads
|
||||||
// - so the ratio won't be quite 2.0, but very close.
|
// - so the ratio won't be quite 2.0, but very close.
|
||||||
EXPECT_GE(D2[0], 1.9 * D1[0]);
|
EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
|
||||||
EXPECT_GE(D2[1], 1.9 * D1[1]);
|
EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfCountersTest, HardwareLimits) {
|
TEST(PerfCountersTest, HardwareLimits) {
|
||||||
|
|
Loading…
Reference in New Issue