Use truncated normal distribution for percentile approximation in util::statistics_lite

This commit is contained in:
Nikita Lisitsa 2024-06-03 20:32:26 +03:00
parent 47b772a432
commit f8c52bcfe2

View file

@ -41,6 +41,16 @@ namespace psemek::util
}
}
inline double normal_cdf(double x)
{
return 0.5 * (1.0 + boost::math::erf(x / std::sqrt(2.0)));
}
inline double normal_cdf_inv(double x)
{
return std::sqrt(2.0) * boost::math::erf_inv(2.0 * x - 1.0);
}
}
template <typename T>
@ -100,10 +110,19 @@ namespace psemek::util
template <typename T>
T statistics_lite<T>::percentile(double p) const
{
// Assume normal distribution
// TODO: use a better distribution, maybe maximizing entropy on [0, +inf)
// See https://en.wikipedia.org/wiki/Differential_entropy#Alternative_proof
return boost::math::erf_inv(2.0 * p - 1) * var() * std::sqrt(2.0) + mean();
// Assume truncated normal distribution in the range [min, max]
// which is the maximum-entropy distributioon on this range
// with specified mean and variance
// See
// https://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution#Other_examples
// https://en.wikipedia.org/wiki/Truncated_normal_distribution
float const mu = mean();
float const sigma = var();
float const alpha = (min_ - mu) / sigma;
float const beta = (max_ - mu) / sigma;
return mu + sigma * detail::normal_cdf_inv(std::lerp(detail::normal_cdf(alpha), detail::normal_cdf(beta), p));
}
template <typename T>