From 45b194e4d4101f4de46dc2afaf20254512ceaa96 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 3 Sep 2021 20:44:10 +0300 Subject: [PATCH] Introduce Coefficient of variation aggregate (#1220) * Introduce Coefficient of variation aggregate I believe, it is much more useful / use to understand, because it is already normalized by the mean, so it is not affected by the duration of the benchmark, unlike the standard deviation. Example of real-world output: ``` raw.pixls.us-unique/GoPro/HERO6 Black$ ~/rawspeed/build-old/src/utilities/rsbench/rsbench GOPR9172.GPR --benchmark_repetitions=27 --benchmark_display_aggregates_only=true --benchmark_counters_tabular=true 2021-09-03T18:05:56+03:00 Running /home/lebedevri/rawspeed/build-old/src/utilities/rsbench/rsbench Run on (32 X 3596.16 MHz CPU s) CPU Caches: L1 Data 32 KiB (x16) L1 Instruction 32 KiB (x16) L2 Unified 512 KiB (x16) L3 Unified 32768 KiB (x2) Load Average: 7.00, 2.99, 1.85 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations CPUTime,s CPUTime/WallTime Pixels Pixels/CPUTime Pixels/WallTime Raws/CPUTime Raws/WallTime WallTime,s ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ GOPR9172.GPR/threads:32/process_time/real_time_mean 11.1 ms 353 ms 27 0.353122 31.9473 12M 33.9879M 1085.84M 2.83232 90.4864 0.0110535 GOPR9172.GPR/threads:32/process_time/real_time_median 11.0 ms 352 ms 27 0.351696 31.9599 12M 34.1203M 1090.11M 2.84336 90.8425 0.0110081 GOPR9172.GPR/threads:32/process_time/real_time_stddev 0.159 ms 4.60 ms 27 4.59539m 0.0462064 0 426.371k 14.9631M 0.0355309 1.24692 158.944u GOPR9172.GPR/threads:32/process_time/real_time_cv 1.44 % 1.30 % 27 0.0130136 1.44633m 0 0.0125448 0.0137802 0.0125448 0.0137802 0.0143795 ``` Fixes https://github.com/google/benchmark/issues/1146 * Be consistent, it's CV, not 'rel std dev' --- docs/user_guide.md | 20 +++++----- src/benchmark_register.cc | 1 + src/statistics.cc | 9 +++++ src/statistics.h | 1 + test/display_aggregates_only_test.cc | 10 +++-- test/output_test_helper.cc | 1 + test/report_aggregates_only_test.cc | 10 +++-- test/statistics_gtest.cc | 7 ++++ test/user_counters_tabular_test.cc | 56 +++++++++++++++++++++++++++- 9 files changed, 95 insertions(+), 20 deletions(-) diff --git a/docs/user_guide.md b/docs/user_guide.md index 9fd4ac1a..cb34c24d 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -930,7 +930,7 @@ Note that `ClobberMemory()` is only available for GNU or MSVC based compilers. -## Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks +## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks By default each benchmark is run once and that single result is reported. However benchmarks are often noisy and a single result may not be representative @@ -940,16 +940,17 @@ benchmark. The number of runs of each benchmark is specified globally by the `--benchmark_repetitions` flag or on a per benchmark basis by calling `Repetitions` on the registered benchmark object. When a benchmark is run more -than once the mean, median and standard deviation of the runs will be reported. +than once the mean, median, standard deviation and coefficient of variation +of the runs will be reported. Additionally the `--benchmark_report_aggregates_only={true|false}`, `--benchmark_display_aggregates_only={true|false}` flags or `ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be used to change how repeated tests are reported. By default the result of each repeated run is reported. When `report aggregates only` option is `true`, -only the aggregates (i.e. mean, median and standard deviation, maybe complexity -measurements if they were requested) of the runs is reported, to both the -reporters - standard output (console), and the file. +only the aggregates (i.e. mean, median, standard deviation and coefficient +of variation, maybe complexity measurements if they were requested) of the runs +is reported, to both the reporters - standard output (console), and the file. However when only the `display aggregates only` option is `true`, only the aggregates are displayed in the standard output, while the file output still contains everything. @@ -961,11 +962,10 @@ benchmark. ## Custom Statistics -While having mean, median and standard deviation is nice, this may not be -enough for everyone. For example you may want to know what the largest -observation is, e.g. because you have some real-time constraints. This is easy. -The following code will specify a custom statistic to be calculated, defined -by a lambda function. +While having these aggregates is nice, this may not be enough for everyone. +For example you may want to know what the largest observation is, e.g. because +you have some real-time constraints. This is easy. The following code will +specify a custom statistic to be calculated, defined by a lambda function. ```c++ void BM_spin_empty(benchmark::State& state) { diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 56fef5cc..f2b32bdb 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -215,6 +215,7 @@ Benchmark::Benchmark(const char* name) ComputeStatistics("mean", StatisticsMean); ComputeStatistics("median", StatisticsMedian); ComputeStatistics("stddev", StatisticsStdDev); + ComputeStatistics("cv", StatisticsCV, kPercentage); } Benchmark::~Benchmark() {} diff --git a/src/statistics.cc b/src/statistics.cc index 88dfc357..00ae97df 100644 --- a/src/statistics.cc +++ b/src/statistics.cc @@ -74,6 +74,15 @@ double StatisticsStdDev(const std::vector& v) { return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); } +double StatisticsCV(const std::vector& v) { + if (v.size() < 2) return 0.0; + + const auto stddev = StatisticsStdDev(v); + const auto mean = StatisticsMean(v); + + return stddev / mean; +} + std::vector ComputeStats( const std::vector& reports) { typedef BenchmarkReporter::Run Run; diff --git a/src/statistics.h b/src/statistics.h index 7eccc855..a9545a58 100644 --- a/src/statistics.h +++ b/src/statistics.h @@ -31,6 +31,7 @@ std::vector ComputeStats( double StatisticsMean(const std::vector& v); double StatisticsMedian(const std::vector& v); double StatisticsStdDev(const std::vector& v); +double StatisticsCV(const std::vector& v); } // end namespace benchmark diff --git a/test/display_aggregates_only_test.cc b/test/display_aggregates_only_test.cc index 3c36d3f0..6ad65e7f 100644 --- a/test/display_aggregates_only_test.cc +++ b/test/display_aggregates_only_test.cc @@ -19,21 +19,23 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find 6 " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find 8 " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3\", " "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc index 46034599..5913fc9b 100644 --- a/test/output_test_helper.cc +++ b/test/output_test_helper.cc @@ -49,6 +49,7 @@ SubMap& GetSubstitutions() { {" %s ", "[ ]+"}, {"%time", "[ ]*" + time_re + "[ ]+ns"}, {"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"}, + {"%console_percentage_report", "[ ]*" + time_re + "[ ]+% [ ]*" + time_re + "[ ]+% [ ]*[0-9]+"}, {"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"}, {"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"}, {"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"}, diff --git a/test/report_aggregates_only_test.cc b/test/report_aggregates_only_test.cc index 9646b9be..47da5035 100644 --- a/test/report_aggregates_only_test.cc +++ b/test/report_aggregates_only_test.cc @@ -19,17 +19,19 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly(); int main(int argc, char* argv[]) { const std::string output = GetFileReporterOutput(argc, argv); - if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 || + if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") != 1 || SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") != - 1) { - std::cout << "Precondition mismatch. Expected to only find three " + 1 || + SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) { + std::cout << "Precondition mismatch. Expected to only find four " "occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n" "\"name\": \"BM_SummaryRepeat/repeats:3_mean\", " "\"name\": \"BM_SummaryRepeat/repeats:3_median\", " - "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire " + "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", " + "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire " "output:\n"; std::cout << output; return 1; diff --git a/test/statistics_gtest.cc b/test/statistics_gtest.cc index 3ddc72dd..1de2d87d 100644 --- a/test/statistics_gtest.cc +++ b/test/statistics_gtest.cc @@ -25,4 +25,11 @@ TEST(StatisticsTest, StdDev) { 1.151086443322134); } +TEST(StatisticsTest, CV) { + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.); + EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}), + 0.32888184094918121); +} + } // end namespace diff --git a/test/user_counters_tabular_test.cc b/test/user_counters_tabular_test.cc index ff81e265..34981ad1 100644 --- a/test/user_counters_tabular_test.cc +++ b/test/user_counters_tabular_test.cc @@ -18,12 +18,14 @@ ADD_CASES(TC_ConsoleOut, {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, - {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, + {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, {"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next}, @@ -181,6 +183,28 @@ ADD_CASES(TC_JSONOut, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 0,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 1,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"}, @@ -268,6 +292,28 @@ ADD_CASES(TC_JSONOut, {"\"Frob\": %float,$", MR_Next}, {"\"Lob\": %float$", MR_Next}, {"}", MR_Next}}); +ADD_CASES(TC_JSONOut, + {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"}, + {"\"family_index\": 0,$", MR_Next}, + {"\"per_family_instance_index\": 1,$", MR_Next}, + {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$", + MR_Next}, + {"\"run_type\": \"aggregate\",$", MR_Next}, + {"\"repetitions\": 2,$", MR_Next}, + {"\"threads\": 2,$", MR_Next}, + {"\"aggregate_name\": \"cv\",$", MR_Next}, + {"\"aggregate_unit\": \"percentage\",$", MR_Next}, + {"\"iterations\": %int,$", MR_Next}, + {"\"real_time\": %float,$", MR_Next}, + {"\"cpu_time\": %float,$", MR_Next}, + {"\"time_unit\": \"ns\",$", MR_Next}, + {"\"Bar\": %float,$", MR_Next}, + {"\"Bat\": %float,$", MR_Next}, + {"\"Baz\": %float,$", MR_Next}, + {"\"Foo\": %float,$", MR_Next}, + {"\"Frob\": %float,$", MR_Next}, + {"\"Lob\": %float$", MR_Next}, + {"}", MR_Next}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); @@ -283,6 +329,9 @@ ADD_CASES(TC_CSVOut, ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); @@ -298,6 +347,9 @@ ADD_CASES(TC_CSVOut, ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report," "%float,%float,%float,%float,%float,%float$"}}); +ADD_CASES(TC_CSVOut, + {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report," + "%float,%float,%float,%float,%float,%float$"}}); // VS2013 does not allow this function to be passed as a lambda argument // to CHECK_BENCHMARK_RESULTS() void CheckTabular(Results const& e) {