From 9aa494ab96374edefb1c6ccc3b2005941e70788c Mon Sep 17 00:00:00 2001 From: lisyarus Date: Fri, 12 Mar 2021 14:43:00 +0300 Subject: [PATCH] Move old statistic module to statistics_lite, a new statistics module can compute percentiles --- libs/util/include/psemek/util/statistics.hpp | 126 +++++++++++++++++-- 1 file changed, 116 insertions(+), 10 deletions(-) diff --git a/libs/util/include/psemek/util/statistics.hpp b/libs/util/include/psemek/util/statistics.hpp index d97c9ea0..d3e27bcd 100644 --- a/libs/util/include/psemek/util/statistics.hpp +++ b/libs/util/include/psemek/util/statistics.hpp @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include namespace psemek::util { @@ -39,7 +42,7 @@ namespace psemek::util } template - struct statistics + struct statistics_lite { void push(T const & value); @@ -50,7 +53,7 @@ namespace psemek::util T max() const { return max_; } template - friend statistics merge(statistics const & s1, statistics const & s2); + friend statistics_lite merge(statistics_lite const & s1, statistics_lite const & s2); private: std::size_t count_ = 0; @@ -61,7 +64,7 @@ namespace psemek::util }; template - void statistics::push(T const & value) + void statistics_lite::push(T const & value) { ++count_; sum_ += value; @@ -71,18 +74,114 @@ namespace psemek::util } template - T statistics::mean() const + T statistics_lite::mean() const { return sum_ / count_; } template - T statistics::var() const + T statistics_lite::var() const { T const m = mean(); return std::sqrt(sum_sqr_ / count_ - m * m); } + template + std::ostream & operator << (std::ostream & os, statistics_lite const & s) + { + os << "mean = " << s.mean() << ", var = " << s.var() << ", range = [" << s.min() << " .. " << s.max() << "]"; + return os; + } + + template + statistics_lite merge(statistics_lite const & s1, statistics_lite const & s2) + { + statistics_lite result; + result.count_ = s1.count_ + s2.count_; + result.sum_ = s1.sum_ + s2.sum_; + result.sum_sqr_ = s1.sum_sqr_ + s2.sum_sqr_; + result.min_ = std::min(s1.min_, s2.min_); + result.max_ = std::max(s1.max_, s2.max_); + return result; + } + + template + struct statistics + { + void push(T const & value); + + std::size_t count() const { return values_.size(); } + T mean() const; + T var() const; + T min() const; + T max() const; + + T percentile(double p) const; + + template + friend statistics merge(statistics const & s1, statistics const & s2); + + private: + mutable std::vector values_; + mutable bool sorted_ = true; + }; + + template + void statistics::push(T const & value) + { + values_.push_back(value); + sorted_ = false; + } + + template + T statistics::mean() const + { + return std::accumulate(values_.begin(), values_.end(), T{}) / count(); + } + + template + T statistics::var() const + { + T sum{}; + T sum_sqr{}; + for (auto const & v : values_) + { + sum += v; + sum_sqr += v * v; + } + + auto m = sum / count(); + return std::sqrt(sum_sqr / count() - m * m); + } + + template + T statistics::min() const + { + if (values_.empty()) + return std::numeric_limits::max(); + return *std::min_element(values_.begin(), values_.end()); + } + + template + T statistics::max() const + { + if (values_.empty()) + return std::numeric_limits::min(); + return *std::max_element(values_.begin(), values_.end()); + } + + template + T statistics::percentile(double p) const + { + if (!sorted_) + { + std::sort(values_.begin(), values_.end()); + sorted_ = true; + } + + return values_[std::min(values_.size() - 1, values_.size() * p)]; + } + template std::ostream & operator << (std::ostream & os, statistics const & s) { @@ -94,11 +193,18 @@ namespace psemek::util statistics merge(statistics const & s1, statistics const & s2) { statistics result; - result.count_ = s1.count_ + s2.count_; - result.sum_ = s1.sum_ + s2.sum_; - result.sum_sqr_ = s1.sum_sqr_ + s2.sum_sqr_; - result.min_ = std::min(s1.min_, s2.min_); - result.max_ = std::max(s1.max_, s2.max_); + result.values_.reserve(s1.values_.size() + s2.values_.size()); + result.sorted_ = s1.sorted_ && s2.sorted_; + if (result.sorted_) + { + std::merge(s1.values_.begin(), s1.values_.end(), s2.values_.begin(), s2.values_.end(), std::back_inserter(result.values_)); + } + else + { + auto it = std::back_inserter(result.values_); + it = std::copy(s1.values_.begin(), s1.values_.end(), it); + it = std::copy(s2.values_.begin(), s2.values_.end(), it); + } return result; }