Move old statistic module to statistics_lite, a new statistics module can compute percentiles

This commit is contained in:
Nikita Lisitsa 2021-03-12 14:43:00 +03:00
parent ee2f5960f4
commit 9aa494ab96

View file

@ -3,6 +3,9 @@
#include <iostream>
#include <cmath>
#include <limits>
#include <vector>
#include <numeric>
#include <algorithm>
namespace psemek::util
{
@ -39,7 +42,7 @@ namespace psemek::util
}
template <typename T>
struct statistics
struct statistics_lite
{
void push(T const & value);
@ -50,7 +53,7 @@ namespace psemek::util
T max() const { return max_; }
template <typename H>
friend statistics<H> merge(statistics<H> const & s1, statistics<H> const & s2);
friend statistics_lite<H> merge(statistics_lite<H> const & s1, statistics_lite<H> const & s2);
private:
std::size_t count_ = 0;
@ -61,7 +64,7 @@ namespace psemek::util
};
template <typename T>
void statistics<T>::push(T const & value)
void statistics_lite<T>::push(T const & value)
{
++count_;
sum_ += value;
@ -71,18 +74,114 @@ namespace psemek::util
}
template <typename T>
T statistics<T>::mean() const
T statistics_lite<T>::mean() const
{
return sum_ / count_;
}
template <typename T>
T statistics<T>::var() const
T statistics_lite<T>::var() const
{
T const m = mean();
return std::sqrt(sum_sqr_ / count_ - m * m);
}
template <typename T>
std::ostream & operator << (std::ostream & os, statistics_lite<T> const & s)
{
os << "mean = " << s.mean() << ", var = " << s.var() << ", range = [" << s.min() << " .. " << s.max() << "]";
return os;
}
template <typename T>
statistics_lite<T> merge(statistics_lite<T> const & s1, statistics_lite<T> const & s2)
{
statistics_lite<T> result;
result.count_ = s1.count_ + s2.count_;
result.sum_ = s1.sum_ + s2.sum_;
result.sum_sqr_ = s1.sum_sqr_ + s2.sum_sqr_;
result.min_ = std::min(s1.min_, s2.min_);
result.max_ = std::max(s1.max_, s2.max_);
return result;
}
template <typename T>
struct statistics
{
void push(T const & value);
std::size_t count() const { return values_.size(); }
T mean() const;
T var() const;
T min() const;
T max() const;
T percentile(double p) const;
template <typename H>
friend statistics<H> merge(statistics<H> const & s1, statistics<H> const & s2);
private:
mutable std::vector<T> values_;
mutable bool sorted_ = true;
};
template <typename T>
void statistics<T>::push(T const & value)
{
values_.push_back(value);
sorted_ = false;
}
template <typename T>
T statistics<T>::mean() const
{
return std::accumulate(values_.begin(), values_.end(), T{}) / count();
}
template <typename T>
T statistics<T>::var() const
{
T sum{};
T sum_sqr{};
for (auto const & v : values_)
{
sum += v;
sum_sqr += v * v;
}
auto m = sum / count();
return std::sqrt(sum_sqr / count() - m * m);
}
template <typename T>
T statistics<T>::min() const
{
if (values_.empty())
return std::numeric_limits<T>::max();
return *std::min_element(values_.begin(), values_.end());
}
template <typename T>
T statistics<T>::max() const
{
if (values_.empty())
return std::numeric_limits<T>::min();
return *std::max_element(values_.begin(), values_.end());
}
template <typename T>
T statistics<T>::percentile(double p) const
{
if (!sorted_)
{
std::sort(values_.begin(), values_.end());
sorted_ = true;
}
return values_[std::min<std::size_t>(values_.size() - 1, values_.size() * p)];
}
template <typename T>
std::ostream & operator << (std::ostream & os, statistics<T> const & s)
{
@ -94,11 +193,18 @@ namespace psemek::util
statistics<T> merge(statistics<T> const & s1, statistics<T> const & s2)
{
statistics<T> result;
result.count_ = s1.count_ + s2.count_;
result.sum_ = s1.sum_ + s2.sum_;
result.sum_sqr_ = s1.sum_sqr_ + s2.sum_sqr_;
result.min_ = std::min(s1.min_, s2.min_);
result.max_ = std::max(s1.max_, s2.max_);
result.values_.reserve(s1.values_.size() + s2.values_.size());
result.sorted_ = s1.sorted_ && s2.sorted_;
if (result.sorted_)
{
std::merge(s1.values_.begin(), s1.values_.end(), s2.values_.begin(), s2.values_.end(), std::back_inserter(result.values_));
}
else
{
auto it = std::back_inserter(result.values_);
it = std::copy(s1.values_.begin(), s1.values_.end(), it);
it = std::copy(s2.values_.begin(), s2.values_.end(), it);
}
return result;
}