From f9920135c515b6ac2b51ca7ba6fafeebe24c9873 Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Wed, 17 Mar 2021 11:53:10 +0100 Subject: [PATCH] fix weighted_mean::operator+= and add missing noexcept to operator*= (#311) --- include/boost/histogram/accumulators/mean.hpp | 4 +- .../histogram/accumulators/weighted_mean.hpp | 45 +++++--- test/accumulators_weighted_mean_test.cpp | 104 ++++++++++++++---- test/storage_adaptor_test.cpp | 17 ++- 4 files changed, 126 insertions(+), 44 deletions(-) diff --git a/include/boost/histogram/accumulators/mean.hpp b/include/boost/histogram/accumulators/mean.hpp index aa0b01fc..5e5a1a8a 100644 --- a/include/boost/histogram/accumulators/mean.hpp +++ b/include/boost/histogram/accumulators/mean.hpp @@ -79,10 +79,10 @@ public: + sum_of_deltas_squared_2 + n2 (mu2 - mu))^2 */ - const auto mu1 = mean_; - const auto mu2 = rhs.mean_; const auto n1 = sum_; + const auto mu1 = mean_; const auto n2 = rhs.sum_; + const auto mu2 = rhs.mean_; sum_ += rhs.sum_; mean_ = (n1 * mu1 + n2 * mu2) / sum_; diff --git a/include/boost/histogram/accumulators/weighted_mean.hpp b/include/boost/histogram/accumulators/weighted_mean.hpp index cc033c91..5c0989d3 100644 --- a/include/boost/histogram/accumulators/weighted_mean.hpp +++ b/include/boost/histogram/accumulators/weighted_mean.hpp @@ -8,6 +8,7 @@ #define BOOST_HISTOGRAM_ACCUMULATORS_WEIGHTED_MEAN_HPP #include +#include #include // for weighted_mean<> #include #include @@ -31,7 +32,7 @@ public: weighted_mean() = default; - /// Allow implicit conversion from other weighted_means + /// Allow implicit conversion from other weighted_means. template weighted_mean(const weighted_mean& o) : sum_of_weights_{o.sum_of_weights_} @@ -39,7 +40,7 @@ public: , weighted_mean_{o.weighted_mean_} , sum_of_weighted_deltas_squared_{o.sum_of_weighted_deltas_squared_} {} - /// Initialize to external sum of weights, sum of weights squared, mean, and variance + /// Initialize to external sum of weights, sum of weights squared, mean, and variance. weighted_mean(const_reference wsum, const_reference wsum2, const_reference mean, const_reference variance) : sum_of_weights_(wsum) @@ -48,10 +49,10 @@ public: , sum_of_weighted_deltas_squared_( variance * (sum_of_weights_ - sum_of_weights_squared_ / sum_of_weights_)) {} - /// Insert sample x + /// Insert sample x. void operator()(const_reference x) { operator()(weight(1), x); } - /// Insert sample x with weight w + /// Insert sample x with weight w. void operator()(const weight_type& w, const_reference x) { sum_of_weights_ += w.value; sum_of_weights_squared_ += w.value * w.value; @@ -60,24 +61,33 @@ public: sum_of_weighted_deltas_squared_ += w.value * delta * (x - weighted_mean_); } - /// Add another weighted_mean + /// Add another weighted_mean. weighted_mean& operator+=(const weighted_mean& rhs) { - if (sum_of_weights_ != 0 || rhs.sum_of_weights_ != 0) { - const auto tmp = - weighted_mean_ * sum_of_weights_ + rhs.weighted_mean_ * rhs.sum_of_weights_; - sum_of_weights_ += rhs.sum_of_weights_; - sum_of_weights_squared_ += rhs.sum_of_weights_squared_; - weighted_mean_ = tmp / sum_of_weights_; - } + if (rhs.sum_of_weights_ == 0) return *this; + + // see mean.hpp for derivation of correct formula + + const auto n1 = sum_of_weights_; + const auto mu1 = weighted_mean_; + const auto n2 = rhs.sum_of_weights_; + const auto mu2 = rhs.weighted_mean_; + + sum_of_weights_ += rhs.sum_of_weights_; + sum_of_weights_squared_ += rhs.sum_of_weights_squared_; + weighted_mean_ = (n1 * mu1 + n2 * mu2) / sum_of_weights_; + sum_of_weighted_deltas_squared_ += rhs.sum_of_weighted_deltas_squared_; + sum_of_weighted_deltas_squared_ += n1 * detail::square(weighted_mean_ - mu1); + sum_of_weighted_deltas_squared_ += n2 * detail::square(weighted_mean_ - mu2); + return *this; } - /** Scale by value + /** Scale by value. This acts as if all samples were scaled by the value. */ - weighted_mean& operator*=(const_reference s) { + weighted_mean& operator*=(const_reference s) noexcept { weighted_mean_ *= s; sum_of_weighted_deltas_squared_ *= s * s; return *this; @@ -92,10 +102,10 @@ public: bool operator!=(const weighted_mean& rhs) const noexcept { return !operator==(rhs); } - /// Return sum of weights + /// Return sum of weights. const_reference sum_of_weights() const noexcept { return sum_of_weights_; } - /// Return sum of weights squared (variance of weight distribution) + /// Return sum of weights squared (variance of weight distribution). const_reference sum_of_weights_squared() const noexcept { return sum_of_weights_squared_; } @@ -106,12 +116,13 @@ public: */ const_reference value() const noexcept { return weighted_mean_; } - /** Return variance of accumulated weighted samples + /** Return variance of accumulated weighted samples. The result is undefined, if `sum_of_weights() == 0` or `sum_of_weights() == sum_of_weights_squared()`. */ value_type variance() const { + // see https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights return sum_of_weighted_deltas_squared_ / (sum_of_weights_ - sum_of_weights_squared_ / sum_of_weights_); } diff --git a/test/accumulators_weighted_mean_test.cpp b/test/accumulators_weighted_mean_test.cpp index e74d336d..c9ee70fb 100644 --- a/test/accumulators_weighted_mean_test.cpp +++ b/test/accumulators_weighted_mean_test.cpp @@ -18,33 +18,95 @@ using namespace std::literals; int main() { using m_t = accumulators::weighted_mean; - m_t a; - BOOST_TEST_EQ(a.sum_of_weights(), 0); - BOOST_TEST_EQ(a, m_t{}); + using detail::square; - a(weight(0.5), 1); - a(weight(1.0), 2); - a(weight(0.5), 3); + // basic interface, string conversion + { + // see https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights - BOOST_TEST_EQ(a.sum_of_weights(), 2); - BOOST_TEST_EQ(a.sum_of_weights_squared(), 1.5); - BOOST_TEST_EQ(a.value(), 2); - BOOST_TEST_IS_CLOSE(a.variance(), 0.8, 1e-3); + m_t a; + BOOST_TEST_EQ(a.sum_of_weights(), 0); + BOOST_TEST_EQ(a, m_t{}); - BOOST_TEST_EQ(str(a), "weighted_mean(2, 2, 0.8)"s); - BOOST_TEST_EQ(str(a, 25, false), " weighted_mean(2, 2, 0.8)"s); - BOOST_TEST_EQ(str(a, 25, true), "weighted_mean(2, 2, 0.8) "s); + a(weight(0.5), 1); + a(weight(1.0), 2); + a(weight(0.5), 3); - auto b = a; - b += a; // same as feeding all samples twice + BOOST_TEST_EQ(a.sum_of_weights(), 1 + 2 * 0.5); + BOOST_TEST_EQ(a.sum_of_weights_squared(), 1 + 2 * 0.5 * 0.5); + const auto m = a.value(); + BOOST_TEST_IS_CLOSE( + a.variance(), + (0.5 * square(1 - m) + square(2 - m) + 0.5 * square(3 - m)) / + (a.sum_of_weights() - a.sum_of_weights_squared() / a.sum_of_weights()), + 1e-3); - BOOST_TEST_EQ(b.sum_of_weights(), 4); - BOOST_TEST_EQ(b.value(), 2); - BOOST_TEST_IS_CLOSE(b.variance(), 0.615, 1e-3); + BOOST_TEST_EQ(str(a), "weighted_mean(2, 2, 0.8)"s); + BOOST_TEST_EQ(str(a, 25, false), " weighted_mean(2, 2, 0.8)"s); + BOOST_TEST_EQ(str(a, 25, true), "weighted_mean(2, 2, 0.8) "s); + } - BOOST_TEST_EQ(m_t() += m_t(), m_t()); - BOOST_TEST_EQ(m_t(1, 2, 3, 4) += m_t(), m_t(1, 2, 3, 4)); - BOOST_TEST_EQ(m_t() += m_t(1, 2, 3, 4), m_t(1, 2, 3, 4)); + // addition of zero element + { + BOOST_TEST_EQ(m_t() += m_t(), m_t()); + BOOST_TEST_EQ(m_t(1, 2, 3, 4) += m_t(), m_t(1, 2, 3, 4)); + BOOST_TEST_EQ(m_t() += m_t(1, 2, 3, 4), m_t(1, 2, 3, 4)); + } + + // addition + { + m_t a, b, c; + + a(weight(4), 2); + a(weight(3), 3); + BOOST_TEST_EQ(a.sum_of_weights(), 4 + 3); + BOOST_TEST_EQ(a.sum_of_weights_squared(), 4 * 4 + 3 * 3); + BOOST_TEST_EQ(a.value(), (4 * 2 + 3 * 3) / 7.); + BOOST_TEST_IS_CLOSE(a.variance(), 0.5, 1e-3); + + b(weight(2), 4); + b(weight(1), 6); + BOOST_TEST_EQ(b.sum_of_weights(), 3); + BOOST_TEST_EQ(b.sum_of_weights_squared(), 1 + 2 * 2); + BOOST_TEST_EQ(b.value(), (2 * 4 + 1 * 6) / (2. + 1.)); + BOOST_TEST_IS_CLOSE(b.variance(), 2, 1e-3); + + c(weight(4), 2); + c(weight(3), 3); + c(weight(2), 4); + c(weight(1), 6); + + auto d = a; + d += b; + BOOST_TEST_EQ(c.sum_of_weights(), d.sum_of_weights()); + BOOST_TEST_EQ(c.sum_of_weights_squared(), d.sum_of_weights_squared()); + BOOST_TEST_EQ(c.value(), d.value()); + BOOST_TEST_IS_CLOSE(c.variance(), d.variance(), 1e-3); + } + + // using weights * 2 compared to adding weighted samples twice must + // - give same for sum_of_weights and mean + // - give twice sum_of_weights_squared + // - give half effective count + // - variance is complicated, but larger + { + m_t a, b; + + for (int i = 0; i < 2; ++i) { + a(weight(0.5), 1); + a(weight(1.0), 2); + a(weight(0.5), 3); + } + + b(weight(1), 1); + b(weight(2), 2); + b(weight(1), 3); + + BOOST_TEST_EQ(a.sum_of_weights(), b.sum_of_weights()); + BOOST_TEST_EQ(2 * a.sum_of_weights_squared(), b.sum_of_weights_squared()); + BOOST_TEST_EQ(a.value(), b.value()); + BOOST_TEST_LT(a.variance(), b.variance()); + } return boost::report_errors(); } diff --git a/test/storage_adaptor_test.cpp b/test/storage_adaptor_test.cpp index 6ccdebd5..7df8b956 100644 --- a/test/storage_adaptor_test.cpp +++ b/test/storage_adaptor_test.cpp @@ -232,10 +232,19 @@ int main() { a.reset(1); a[0](/* sample */ 1); a[0](weight(2), /* sample */ 2); - a[0] += accumulators::weighted_mean<>(1, 0, 0, 0); - BOOST_TEST_EQ(a[0].sum_of_weights(), 4); - BOOST_TEST_IS_CLOSE(a[0].value(), 1.25, 1e-3); - BOOST_TEST_IS_CLOSE(a[0].variance(), 0.242, 1e-3); + + accumulators::weighted_mean b; + b(weight(3), 3); + a[0] += b; + + accumulators::weighted_mean c; + c(weight(1), 1); + c(weight(2), 2); + c(weight(3), 3); + + BOOST_TEST_EQ(a[0].sum_of_weights(), c.sum_of_weights()); + BOOST_TEST_IS_CLOSE(a[0].value(), c.value(), 1e-3); + BOOST_TEST_IS_CLOSE(a[0].variance(), c.variance(), 1e-3); } // exceeding array capacity