Filter performance counter names, not invalidate all (#1554)

* Filter performance counter names, not invalidate all Currently, the performance counters are validated while they are being created and one failure returns NoCounters(), ie it effecitvely invalidates all the counters. I would like to propose a new behavior: filter instead. If an invalid name is added to the counter list, or if that particular counter is not supported on this platform, that counter is dropped from the list and an error messages is created, while all the other counters remain active. This will give testers a peace of mind that if one mistake is made or if something is changed or removed from libpfm, their entire test will not be invalidated. This feature gives more tolerance with respect to versioning. Another positive is that testers can now input a superset of all desired counters for all platforms they support and just let Benchmark drop all those that are not supported, although it will create quite a lot of noise down the line, in which case perhaps we should drop silently or make a consolidated, single error line but this was not implemented in this change set. * Removed unused helper type.
2023-03-02 08:56:13 -06:00 · 2023-03-02 08:56:13 -06:00 · 2d5012275a
parent 27c1d8ace9
commit 2d5012275a
2 changed files with 78 additions and 32 deletions
--- a/src/perf_counters.cc
+++ b/src/perf_counters.cc
@ -71,32 +71,61 @@ bool PerfCounters::IsCounterSupported(const std::string& name) {
  return (ret == PFM_SUCCESS);
 }

+// Validates all counter names passed, returning only the valid ones
+static std::vector<std::string> validateCounters(
+    const std::vector<std::string>& counter_names) {
+  // All valid names to be returned
+  std::vector<std::string> valid_names;
+
+  // Loop through all the given names
+  int invalid_counter = 0;
+  for (const std::string& name : counter_names) {
+    // Check trivial empty
+    if (name.empty()) {
+      GetErrorLogInstance() << "A counter name was the empty string\n";
+      invalid_counter++;
+      continue;
+    }
+    if (PerfCounters::IsCounterSupported(name)) {
+      // we are about to push into the valid names vector
+      // check if we did not reach the maximum
+      if (valid_names.size() == PerfCounterValues::kMaxCounters) {
+        GetErrorLogInstance()
+            << counter_names.size()
+            << " counters were requested. The maximum is "
+            << PerfCounterValues::kMaxCounters << " and "
+            << counter_names.size() - invalid_counter - valid_names.size()
+            << " will be ignored\n";
+        // stop the loop and return what we have already
+        break;
+      }
+      valid_names.push_back(name);
+    } else {
+      GetErrorLogInstance() << "Performance counter " << name
+                            << " incorrect or not supported on this platform\n";
+      invalid_counter++;
+    }
+  }
+  // RVO should take care of this
+  return valid_names;
+}
+
 PerfCounters PerfCounters::Create(
    const std::vector<std::string>& counter_names) {
-  if (counter_names.empty()) {
+  std::vector<std::string> valid_names = validateCounters(counter_names);
+  if (valid_names.empty()) {
    return NoCounters();
  }
-  if (counter_names.size() > PerfCounterValues::kMaxCounters) {
-    GetErrorLogInstance()
-        << counter_names.size()
-        << " counters were requested. The minimum is 1, the maximum is "
-        << PerfCounterValues::kMaxCounters << "\n";
-    return NoCounters();
-  }
-  std::vector<int> counter_ids(counter_names.size());
+  std::vector<int> counter_ids(valid_names.size());
  std::vector<int> leader_ids;

  const int mode = PFM_PLM3;  // user mode only
  int group_id = -1;
-  for (size_t i = 0; i < counter_names.size(); ++i) {
+  for (size_t i = 0; i < valid_names.size(); ++i) {
    const bool is_first = (group_id < 0);
    struct perf_event_attr attr {};
    attr.size = sizeof(attr);
-    const auto& name = counter_names[i];
-    if (name.empty()) {
-      GetErrorLogInstance() << "A counter name was the empty string\n";
-      return NoCounters();
-    }
+    const auto& name = valid_names[i];
    pfm_perf_encode_arg_t arg{};
    arg.attr = &attr;

@ -159,7 +188,7 @@ PerfCounters PerfCounters::Create(
    }
  }

-  return PerfCounters(counter_names, std::move(counter_ids),
+  return PerfCounters(valid_names, std::move(counter_ids),
                      std::move(leader_ids));
 }

@ -198,6 +227,9 @@ Mutex PerfCountersMeasurement::mutex_;
 int PerfCountersMeasurement::ref_count_ = 0;
 PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();

+// The validation in PerfCounter::Create will create less counters than passed
+// so it should be okay to initialize start_values_ and end_values_ with the
+// upper bound as passed
 PerfCountersMeasurement::PerfCountersMeasurement(
    const std::vector<std::string>& counter_names)
    : start_values_(counter_names.size()), end_values_(counter_names.size()) {
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@ -40,26 +40,40 @@ TEST(PerfCountersTest, NegativeTest) {
  EXPECT_FALSE(PerfCounters::Create({}).IsValid());
  EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
  EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
+  EXPECT_TRUE(PerfCounters::Create(
+                  {kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
+                  .IsValid());
+  {
+    auto counter =
+        PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
+    EXPECT_TRUE(counter.IsValid());
+    EXPECT_EQ(counter.num_counters(), 2);
+    EXPECT_EQ(counter.names(), std::vector<std::string>(
+                                   {kGenericPerfEvent2, kGenericPerfEvent1}));
+  }
+  {
+    auto counter = PerfCounters::Create(
+        {kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
+    EXPECT_TRUE(counter.IsValid());
+    EXPECT_EQ(counter.num_counters(), 2);
+    EXPECT_EQ(counter.names(), std::vector<std::string>(
+                                   {kGenericPerfEvent3, kGenericPerfEvent1}));
+  }
  {
    EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
                                      kGenericPerfEvent3})
                    .IsValid());
  }
-  EXPECT_FALSE(
-      PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1})
-          .IsValid());
-  EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name",
-                                     kGenericPerfEvent1})
-                   .IsValid());
  {
-    EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
-                                      kGenericPerfEvent3})
-                    .IsValid());
+    auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
+                                         kGenericPerfEvent3,
+                                         "MISPREDICTED_BRANCH_RETIRED"});
+    EXPECT_TRUE(counter.IsValid());
+    EXPECT_EQ(counter.num_counters(), 3);
+    EXPECT_EQ(counter.names(),
+              std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
+                                        kGenericPerfEvent3}));
  }
-  EXPECT_FALSE(
-      PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
-                            kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"})
-          .IsValid());
 }

 TEST(PerfCountersTest, Read1Counter) {
@ -157,9 +171,9 @@ void measure(size_t threadcount, PerfCounterValues* values1,
  auto work = [&]() { BM_CHECK(do_work() > 1000); };

  // We need to first set up the counters, then start the threads, so the
-  // threads would inherit the counters. But later, we need to first destroy the
-  // thread pool (so all the work finishes), then measure the counters. So the
-  // scopes overlap, and we need to explicitly control the scope of the
+  // threads would inherit the counters. But later, we need to first destroy
+  // the thread pool (so all the work finishes), then measure the counters. So
+  // the scopes overlap, and we need to explicitly control the scope of the
  // threadpool.
  auto counters =
      PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});