tools/compare: don't actually discard valid (but zero) `pvalue` (#1733)

* tools/compare: when dumping json, pretty-print it It's rather completely non-human-readable otherwise. I can't imagine the filesize really matters, and if it does, it should just be compressed later on. * tools/compare: add failing test * tools/compare: don't actually discard valid (but zero) `pvalue` So, this is embarressing. For a very large number of repetitions, we can end up with pvalue of a true zero, and it obviously compares false, and we treat it as-if we failed to compute it...
2024-01-08 12:57:00 +03:00 · 2024-01-08 12:57:00 +03:00 · 96d820f73f
parent e61e332df9
commit 96d820f73f
4 changed files with 132 additions and 2 deletions
--- a/tools/compare.py
+++ b/tools/compare.py
@ -327,7 +327,7 @@ def main():
    # Optionally, diff and output to JSON
    if args.dump_to_json is not None:
        with open(args.dump_to_json, "w") as f_json:
-            json.dump(diff_report, f_json)
+            json.dump(diff_report, f_json, indent=1)


 class TestParser(unittest.TestCase):
--- a/tools/gbench/Inputs/test5_run0.json
+++ b/tools/gbench/Inputs/test5_run0.json
@ -0,0 +1,18 @@
+{
+    "context": {
+        "date": "2016-08-02 17:44:46",
+        "num_cpus": 4,
+        "mhz_per_cpu": 4228,
+        "cpu_scaling_enabled": false,
+        "library_build_type": "release"
+    },
+    "benchmarks": [
+        {
+            "name": "BM_ManyRepetitions",
+            "iterations": 1000,
+            "real_time": 1,
+            "cpu_time": 1000,
+            "time_unit": "s"
+        }
+    ]
+}
--- a/tools/gbench/Inputs/test5_run1.json
+++ b/tools/gbench/Inputs/test5_run1.json
@ -0,0 +1,18 @@
+{
+    "context": {
+        "date": "2016-08-02 17:44:46",
+        "num_cpus": 4,
+        "mhz_per_cpu": 4228,
+        "cpu_scaling_enabled": false,
+        "library_build_type": "release"
+    },
+    "benchmarks": [
+        {
+            "name": "BM_ManyRepetitions",
+            "iterations": 1000,
+            "real_time": 1000,
+            "cpu_time": 1,
+            "time_unit": "s"
+        }
+    ]
+}
--- a/tools/gbench/report.py
+++ b/tools/gbench/report.py
@ -315,7 +315,7 @@ def get_difference_report(json1, json2, utest=False):
            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
                timings_cpu, timings_time
            )
-            if cpu_pvalue and time_pvalue:
+            if cpu_pvalue is not None and time_pvalue is not None:
                utest_results = {
                    "have_optimal_repetitions": have_optimal_repetitions,
                    "cpu_pvalue": cpu_pvalue,
@ -1490,6 +1490,100 @@ class TestReportSorting(unittest.TestCase):
                self.assertEqual(out["name"], expected)


+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
+    unittest.TestCase
+):
+    @classmethod
+    def setUpClass(cls):
+        def load_results():
+            import json
+
+            testInputs = os.path.join(
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test5_run0.json")
+            testOutput2 = os.path.join(testInputs, "test5_run1.json")
+            with open(testOutput1, "r") as f:
+                json1 = json.load(f)
+                json1["benchmarks"] = [
+                    json1["benchmarks"][0] for i in range(1000)
+                ]
+            with open(testOutput2, "r") as f:
+                json2 = json.load(f)
+                json2["benchmarks"] = [
+                    json2["benchmarks"][0] for i in range(1000)
+                ]
+            return json1, json2
+
+        json1, json2 = load_results()
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
+
+    def test_json_diff_report_pretty_printing(self):
+        expect_line = [
+            "BM_ManyRepetitions_pvalue",
+            "0.0000",
+            "0.0000",
+            "U",
+            "Test,",
+            "Repetitions:",
+            "1000",
+            "vs",
+            "1000",
+        ]
+        output_lines_with_header = print_difference_report(
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
+        output_lines = output_lines_with_header[2:]
+        found = False
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(" ") if x]
+            found = expect_line == parts
+            if found:
+                break
+        self.assertTrue(found)
+
+    def test_json_diff_report(self):
+        expected_output = [
+            {
+                "name": "BM_ManyRepetitions",
+                "label": "",
+                "time_unit": "s",
+                "run_type": "",
+                "aggregate_name": "",
+                "utest": {
+                    "have_optimal_repetitions": True,
+                    "cpu_pvalue": 0.0,
+                    "time_pvalue": 0.0,
+                    "nr_of_repetitions": 1000,
+                    "nr_of_repetitions_other": 1000,
+                },
+            },
+            {
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": 1.0,
+                        "cpu_time": 1000.000000000069,
+                        "real_time_other": 1000.000000000069,
+                        "cpu_time_other": 1.0,
+                        "time": 999.000000000069,
+                        "cpu": -0.9990000000000001,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
+        ]
+        self.assertEqual(len(self.json_diff_report), len(expected_output))
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
+            assert_utest(self, out, expected)
+
+
 def assert_utest(unittest_instance, lhs, rhs):
    if lhs["utest"]:
        unittest_instance.assertAlmostEqual(