134 lines
5.0 KiB
C++
134 lines
5.0 KiB
C++
/*
|
|
* Copyright Nick Thompson, 2019
|
|
* Copyright Matt Borland, 2021
|
|
* Use, modification and distribution are subject to the
|
|
* Boost Software License, Version 1.0. (See accompanying file
|
|
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
*/
|
|
|
|
#ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
|
|
#define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
|
|
|
|
#include <cmath>
|
|
#include <algorithm>
|
|
#include <utility>
|
|
#include <tuple>
|
|
#include <stdexcept>
|
|
#include <type_traits>
|
|
#include <boost/math/statistics/univariate_statistics.hpp>
|
|
#include <boost/math/statistics/bivariate_statistics.hpp>
|
|
|
|
namespace boost { namespace math { namespace statistics { namespace detail {
|
|
|
|
|
|
template<class ReturnType, class RandomAccessContainer>
|
|
ReturnType simple_ordinary_least_squares_impl(RandomAccessContainer const & x,
|
|
RandomAccessContainer const & y)
|
|
{
|
|
using Real = typename std::tuple_element<0, ReturnType>::type;
|
|
if (x.size() <= 1)
|
|
{
|
|
throw std::domain_error("At least 2 samples are required to perform a linear regression.");
|
|
}
|
|
|
|
if (x.size() != y.size())
|
|
{
|
|
throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
|
|
}
|
|
std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
|
|
Real mu_x = std::get<0>(temp);
|
|
Real mu_y = std::get<1>(temp);
|
|
Real cov_xy = std::get<2>(temp);
|
|
|
|
Real var_x = boost::math::statistics::variance(x);
|
|
|
|
if (var_x <= 0) {
|
|
throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
|
|
}
|
|
|
|
|
|
Real c1 = cov_xy/var_x;
|
|
Real c0 = mu_y - c1*mu_x;
|
|
|
|
return std::make_pair(c0, c1);
|
|
}
|
|
|
|
template<class ReturnType, class RandomAccessContainer>
|
|
ReturnType simple_ordinary_least_squares_with_R_squared_impl(RandomAccessContainer const & x,
|
|
RandomAccessContainer const & y)
|
|
{
|
|
using Real = typename std::tuple_element<0, ReturnType>::type;
|
|
if (x.size() <= 1)
|
|
{
|
|
throw std::domain_error("At least 2 samples are required to perform a linear regression.");
|
|
}
|
|
|
|
if (x.size() != y.size())
|
|
{
|
|
throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
|
|
}
|
|
std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
|
|
Real mu_x = std::get<0>(temp);
|
|
Real mu_y = std::get<1>(temp);
|
|
Real cov_xy = std::get<2>(temp);
|
|
|
|
Real var_x = boost::math::statistics::variance(x);
|
|
|
|
if (var_x <= 0) {
|
|
throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
|
|
}
|
|
|
|
|
|
Real c1 = cov_xy/var_x;
|
|
Real c0 = mu_y - c1*mu_x;
|
|
|
|
Real squared_residuals = 0;
|
|
Real squared_mean_deviation = 0;
|
|
for(decltype(y.size()) i = 0; i < y.size(); ++i) {
|
|
squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
|
|
Real ei = (c0 + c1*x[i]) - y[i];
|
|
squared_residuals += ei*ei;
|
|
}
|
|
|
|
Real Rsquared;
|
|
if (squared_mean_deviation == 0) {
|
|
// Then y = constant, so the linear regression is perfect.
|
|
Rsquared = 1;
|
|
} else {
|
|
Rsquared = 1 - squared_residuals/squared_mean_deviation;
|
|
}
|
|
|
|
return std::make_tuple(c0, c1, Rsquared);
|
|
}
|
|
} // namespace detail
|
|
|
|
template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
|
|
typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
|
|
inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<double, double>
|
|
{
|
|
return detail::simple_ordinary_least_squares_impl<std::pair<double, double>>(x, y);
|
|
}
|
|
|
|
template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
|
|
typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
|
|
inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<Real, Real>
|
|
{
|
|
return detail::simple_ordinary_least_squares_impl<std::pair<Real, Real>>(x, y);
|
|
}
|
|
|
|
template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
|
|
typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
|
|
inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<double, double, double>
|
|
{
|
|
return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<double, double, double>>(x, y);
|
|
}
|
|
|
|
template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
|
|
typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
|
|
inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<Real, Real, Real>
|
|
{
|
|
return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<Real, Real, Real>>(x, y);
|
|
}
|
|
}}} // namespace boost::math::statistics
|
|
#endif
|