libcarla/include/system/boost/math/statistics/runs_test.hpp
2024-10-18 13:19:59 +08:00

122 lines
3.4 KiB
C++

/*
* Copyright Nick Thompson, 2019
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/
#ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP
#define BOOST_MATH_STATISTICS_RUNS_TEST_HPP
#include <cmath>
#include <algorithm>
#include <utility>
#include <boost/math/statistics/univariate_statistics.hpp>
#include <boost/math/distributions/normal.hpp>
namespace boost::math::statistics {
template<class RandomAccessContainer>
auto runs_above_and_below_threshold(RandomAccessContainer const & v,
typename RandomAccessContainer::value_type threshold)
{
using Real = typename RandomAccessContainer::value_type;
using std::sqrt;
using std::abs;
if (v.size() <= 1)
{
throw std::domain_error("At least 2 samples are required to get number of runs.");
}
typedef boost::math::policies::policy<
boost::math::policies::promote_float<false>,
boost::math::policies::promote_double<false> >
no_promote_policy;
decltype(v.size()) nabove = 0;
decltype(v.size()) nbelow = 0;
decltype(v.size()) imin = 0;
// Take care of the case that v[0] == threshold:
while (imin < v.size() && v[imin] == threshold) {
++imin;
}
// Take care of the constant vector case:
if (imin == v.size()) {
return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
}
bool run_up = (v[imin] > threshold);
if (run_up) {
++nabove;
} else {
++nbelow;
}
decltype(v.size()) runs = 1;
for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) {
if (v[i] == threshold) {
// skip values precisely equal to threshold (following R's randtests package)
continue;
}
bool above = (v[i] > threshold);
if (above) {
++nabove;
} else {
++nbelow;
}
if (run_up == above) {
continue;
}
else {
run_up = above;
runs++;
}
}
// If you make n an int, the subtraction is gonna be bad in the variance:
Real n = nabove + nbelow;
Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n);
Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1));
// Bizarre, pathological limits:
if (variance == 0)
{
if (runs == expected_runs)
{
Real statistic = 0;
Real pvalue = 1;
return std::make_pair(statistic, pvalue);
}
else
{
return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
}
}
Real sd = sqrt(variance);
Real statistic = (runs - expected_runs)/sd;
auto normal = boost::math::normal_distribution<Real, no_promote_policy>(0,1);
Real pvalue = 2*boost::math::cdf(normal, -abs(statistic));
return std::make_pair(statistic, pvalue);
}
template<class RandomAccessContainer>
auto runs_above_and_below_median(RandomAccessContainer const & v)
{
using Real = typename RandomAccessContainer::value_type;
using std::log;
using std::sqrt;
// We have to memcpy v because the median does a partial sort,
// and that would be catastrophic for the runs test.
auto w = v;
Real median = boost::math::statistics::median(w);
return runs_above_and_below_threshold(v, median);
}
}
#endif