Use truncated normal distribution for percentile approximation in util::statistics_lite
This commit is contained in:
parent
47b772a432
commit
f8c52bcfe2
1 changed files with 23 additions and 4 deletions
|
|
@ -41,6 +41,16 @@ namespace psemek::util
|
|||
}
|
||||
}
|
||||
|
||||
inline double normal_cdf(double x)
|
||||
{
|
||||
return 0.5 * (1.0 + boost::math::erf(x / std::sqrt(2.0)));
|
||||
}
|
||||
|
||||
inline double normal_cdf_inv(double x)
|
||||
{
|
||||
return std::sqrt(2.0) * boost::math::erf_inv(2.0 * x - 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
@ -100,10 +110,19 @@ namespace psemek::util
|
|||
template <typename T>
|
||||
T statistics_lite<T>::percentile(double p) const
|
||||
{
|
||||
// Assume normal distribution
|
||||
// TODO: use a better distribution, maybe maximizing entropy on [0, +inf)
|
||||
// See https://en.wikipedia.org/wiki/Differential_entropy#Alternative_proof
|
||||
return boost::math::erf_inv(2.0 * p - 1) * var() * std::sqrt(2.0) + mean();
|
||||
// Assume truncated normal distribution in the range [min, max]
|
||||
// which is the maximum-entropy distributioon on this range
|
||||
// with specified mean and variance
|
||||
// See
|
||||
// https://en.wikipedia.org/wiki/Maximum_entropy_probability_distribution#Other_examples
|
||||
// https://en.wikipedia.org/wiki/Truncated_normal_distribution
|
||||
|
||||
float const mu = mean();
|
||||
float const sigma = var();
|
||||
float const alpha = (min_ - mu) / sigma;
|
||||
float const beta = (max_ - mu) / sigma;
|
||||
|
||||
return mu + sigma * detail::normal_cdf_inv(std::lerp(detail::normal_cdf(alpha), detail::normal_cdf(beta), p));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue