From 6dd50bb6061d9bf9c7053032334ccb72f6438d09 Mon Sep 17 00:00:00 2001 From: Jesse Rosenstock Date: Thu, 24 Aug 2023 16:05:09 +0200 Subject: [PATCH] StatisticsMedian: Fix bug Previously, this could return the wrong result when there was an even number of elements. There were two `nth_element` calls. The second call could change elements in `[center2, end])`, which was where `center` pointed. Therefore, `*center` sometimes had the wrong value after the second `nth_element` call. Rewrite to use `max_element` instead of the second call to `nth_element`. This avoids modifying the vector. --- src/statistics.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/statistics.cc b/src/statistics.cc index c4b54b27..a0e828e8 100644 --- a/src/statistics.cc +++ b/src/statistics.cc @@ -42,13 +42,12 @@ double StatisticsMedian(const std::vector& v) { auto center = copy.begin() + v.size() / 2; std::nth_element(copy.begin(), center, copy.end()); - // did we have an odd number of samples? - // if yes, then center is the median - // it no, then we are looking for the average between center and the value - // before + // Did we have an odd number of samples? If yes, then center is the median. + // If not, then we are looking for the average between center and the value + // before. Instead of resorting, we just look for the max value before it. + // (Since `copy` is partially sorted.) if (v.size() % 2 == 1) return *center; - auto center2 = copy.begin() + v.size() / 2 - 1; - std::nth_element(copy.begin(), center2, copy.end()); + auto center2 = std::max_element(copy.begin(), center); return (*center + *center2) / 2.0; }