97 lines
2.9 KiB
C++
97 lines
2.9 KiB
C++
// (C) Copyright Nick Thompson 2018.
|
|
// Use, modification and distribution are subject to the
|
|
// Boost Software License, Version 1.0. (See accompanying file
|
|
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
#ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
|
|
#define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
|
|
|
|
#include <iterator>
|
|
#include <tuple>
|
|
#include <limits>
|
|
#include <boost/math/tools/assert.hpp>
|
|
#include <boost/math/tools/header_deprecated.hpp>
|
|
|
|
BOOST_MATH_HEADER_DEPRECATED("<boost/math/statistics/bivariate_statistics.hpp>");
|
|
|
|
namespace boost{ namespace math{ namespace tools {
|
|
|
|
template<class Container>
|
|
auto means_and_covariance(Container const & u, Container const & v)
|
|
{
|
|
using Real = typename Container::value_type;
|
|
using std::size;
|
|
BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
|
|
BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
|
|
|
|
// See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
|
|
Real cov = 0;
|
|
Real mu_u = u[0];
|
|
Real mu_v = v[0];
|
|
|
|
for(size_t i = 1; i < size(u); ++i)
|
|
{
|
|
Real u_tmp = (u[i] - mu_u)/(i+1);
|
|
Real v_tmp = v[i] - mu_v;
|
|
cov += i*u_tmp*v_tmp;
|
|
mu_u = mu_u + u_tmp;
|
|
mu_v = mu_v + v_tmp/(i+1);
|
|
}
|
|
|
|
return std::make_tuple(mu_u, mu_v, cov/size(u));
|
|
}
|
|
|
|
template<class Container>
|
|
auto covariance(Container const & u, Container const & v)
|
|
{
|
|
auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
|
|
return cov;
|
|
}
|
|
|
|
template<class Container>
|
|
auto correlation_coefficient(Container const & u, Container const & v)
|
|
{
|
|
using Real = typename Container::value_type;
|
|
using std::size;
|
|
BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
|
|
BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
|
|
|
|
Real cov = 0;
|
|
Real mu_u = u[0];
|
|
Real mu_v = v[0];
|
|
Real Qu = 0;
|
|
Real Qv = 0;
|
|
|
|
for(size_t i = 1; i < size(u); ++i)
|
|
{
|
|
Real u_tmp = u[i] - mu_u;
|
|
Real v_tmp = v[i] - mu_v;
|
|
Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
|
|
Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
|
|
cov += i*u_tmp*v_tmp/(i+1);
|
|
mu_u = mu_u + u_tmp/(i+1);
|
|
mu_v = mu_v + v_tmp/(i+1);
|
|
}
|
|
|
|
// If one dataset is constant, then they have no correlation:
|
|
// See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector
|
|
// Thanks to zbjornson for pointing this out.
|
|
if (Qu == 0 || Qv == 0)
|
|
{
|
|
return std::numeric_limits<Real>::quiet_NaN();
|
|
}
|
|
|
|
// Make sure rho in [-1, 1], even in the presence of numerical noise.
|
|
Real rho = cov/sqrt(Qu*Qv);
|
|
if (rho > 1) {
|
|
rho = 1;
|
|
}
|
|
if (rho < -1) {
|
|
rho = -1;
|
|
}
|
|
return rho;
|
|
}
|
|
|
|
}}}
|
|
#endif
|