From f8c52bcfe23fd48920a98440ec2cd3941792d5c7 Mon Sep 17 00:00:00 2001 From: lisyarus Date: Mon, 3 Jun 2024 20:32:26 +0300 Subject: [PATCH] Use truncated normal distribution for percentile approximation in util::statistics_lite --- libs/util/include/psemek/util/statistics.hpp | 27 +++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/libs/util/include/psemek/util/statistics.hpp b/libs/util/include/psemek/util/statistics.hpp index ab2a964e..b81aeb4b 100644 --- a/libs/util/include/psemek/util/statistics.hpp +++ b/libs/util/include/psemek/util/statistics.hpp @@ -41,6 +41,16 @@ namespace psemek::util } } + inline double normal_cdf(double x) + { + return 0.5 * (1.0 + boost::math::erf(x / std::sqrt(2.0))); + } + + inline double normal_cdf_inv(double x) + { + return std::sqrt(2.0) * boost::math::erf_inv(2.0 * x - 1.0); + } + } template @@ -100,10 +110,19 @@ namespace psemek::util template T statistics_lite::percentile(double p) const { - // Assume normal distribution - // TODO: use a better distribution, maybe maximizing entropy on [0, +inf) - // See https://en.wikipedia.org/wiki/Differential_entropy#Alternative_proof - return boost::math::erf_inv(2.0 * p - 1) * var() * std::sqrt(2.0) + mean(); + // Assume truncated normal distribution in the range [min, max] + // which is the maximum-entropy distributioon on this range + // with specified mean and variance + // See + // https://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution#Other_examples + // https://en.wikipedia.org/wiki/Truncated_normal_distribution + + float const mu = mean(); + float const sigma = var(); + float const alpha = (min_ - mu) / sigma; + float const beta = (max_ - mu) / sigma; + + return mu + sigma * detail::normal_cdf_inv(std::lerp(detail::normal_cdf(alpha), detail::normal_cdf(beta), p)); } template