388 lines
14 KiB
C++
388 lines
14 KiB
C++
///////////////////////////////////////////////////////////////
|
|
// Copyright 2020 Madhur Chauhan.
|
|
// Copyright 2020 John Maddock. Distributed under the Boost
|
|
// Software License, Version 1.0. (See accompanying file
|
|
// LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt
|
|
|
|
#ifndef BOOST_MP_ADD_UNSIGNED_HPP
|
|
#define BOOST_MP_ADD_UNSIGNED_HPP
|
|
|
|
#include <boost/multiprecision/cpp_int/intel_intrinsics.hpp>
|
|
#include <boost/multiprecision/detail/assert.hpp>
|
|
|
|
namespace boost { namespace multiprecision { namespace backends {
|
|
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
using ::boost::multiprecision::std_constexpr::swap;
|
|
//
|
|
// This is the generic, C++ only version of addition.
|
|
// It's also used for all constexpr branches, hence the name.
|
|
// Nothing fancy, just let uintmax_t take the strain:
|
|
//
|
|
double_limb_type carry = 0;
|
|
std::size_t m(0), x(0);
|
|
std::size_t as = a.size();
|
|
std::size_t bs = b.size();
|
|
minmax(as, bs, m, x);
|
|
if (x == 1)
|
|
{
|
|
bool s = a.sign();
|
|
result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
|
|
result.sign(s);
|
|
return;
|
|
}
|
|
result.resize(x, x);
|
|
typename CppInt2::const_limb_pointer pa = a.limbs();
|
|
typename CppInt3::const_limb_pointer pb = b.limbs();
|
|
typename CppInt1::limb_pointer pr = result.limbs();
|
|
typename CppInt1::limb_pointer pr_end = pr + m;
|
|
|
|
if (as < bs)
|
|
swap(pa, pb);
|
|
|
|
// First where a and b overlap:
|
|
while (pr != pr_end)
|
|
{
|
|
carry += static_cast<double_limb_type>(*pa) + static_cast<double_limb_type>(*pb);
|
|
#ifdef __MSVC_RUNTIME_CHECKS
|
|
*pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
|
|
#else
|
|
*pr = static_cast<limb_type>(carry);
|
|
#endif
|
|
carry >>= CppInt1::limb_bits;
|
|
++pr, ++pa, ++pb;
|
|
}
|
|
pr_end += x - m;
|
|
// Now where only a has digits:
|
|
while (pr != pr_end)
|
|
{
|
|
if (!carry)
|
|
{
|
|
if (pa != pr)
|
|
std_constexpr::copy(pa, pa + (pr_end - pr), pr);
|
|
break;
|
|
}
|
|
carry += static_cast<double_limb_type>(*pa);
|
|
#ifdef __MSVC_RUNTIME_CHECKS
|
|
*pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
|
|
#else
|
|
*pr = static_cast<limb_type>(carry);
|
|
#endif
|
|
carry >>= CppInt1::limb_bits;
|
|
++pr, ++pa;
|
|
}
|
|
if (carry)
|
|
{
|
|
// We overflowed, need to add one more limb:
|
|
result.resize(x + 1, x + 1);
|
|
if (result.size() > x)
|
|
result.limbs()[x] = static_cast<limb_type>(1u);
|
|
}
|
|
result.normalize();
|
|
result.sign(a.sign());
|
|
}
|
|
//
|
|
// Core subtraction routine for all non-trivial cpp_int's:
|
|
//
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
using ::boost::multiprecision::std_constexpr::swap;
|
|
//
|
|
// This is the generic, C++ only version of subtraction.
|
|
// It's also used for all constexpr branches, hence the name.
|
|
// Nothing fancy, just let uintmax_t take the strain:
|
|
//
|
|
double_limb_type borrow = 0;
|
|
std::size_t m(0), x(0);
|
|
minmax(a.size(), b.size(), m, x);
|
|
//
|
|
// special cases for small limb counts:
|
|
//
|
|
if (x == 1)
|
|
{
|
|
bool s = a.sign();
|
|
limb_type al = *a.limbs();
|
|
limb_type bl = *b.limbs();
|
|
if (bl > al)
|
|
{
|
|
::boost::multiprecision::std_constexpr::swap(al, bl);
|
|
s = !s;
|
|
}
|
|
result = al - bl;
|
|
result.sign(s);
|
|
return;
|
|
}
|
|
// This isn't used till later, but comparison has to occur before we resize the result,
|
|
// as that may also resize a or b if this is an inplace operation:
|
|
int c = a.compare_unsigned(b);
|
|
// Set up the result vector:
|
|
result.resize(x, x);
|
|
// Now that a, b, and result are stable, get pointers to their limbs:
|
|
typename CppInt2::const_limb_pointer pa = a.limbs();
|
|
typename CppInt3::const_limb_pointer pb = b.limbs();
|
|
typename CppInt1::limb_pointer pr = result.limbs();
|
|
bool swapped = false;
|
|
if (c < 0)
|
|
{
|
|
swap(pa, pb);
|
|
swapped = true;
|
|
}
|
|
else if (c == 0)
|
|
{
|
|
result = static_cast<limb_type>(0);
|
|
return;
|
|
}
|
|
|
|
std::size_t i = 0;
|
|
// First where a and b overlap:
|
|
while (i < m)
|
|
{
|
|
borrow = static_cast<double_limb_type>(pa[i]) - static_cast<double_limb_type>(pb[i]) - borrow;
|
|
pr[i] = static_cast<limb_type>(borrow);
|
|
borrow = (borrow >> CppInt1::limb_bits) & 1u;
|
|
++i;
|
|
}
|
|
// Now where only a has digits, only as long as we've borrowed:
|
|
while (borrow && (i < x))
|
|
{
|
|
borrow = static_cast<double_limb_type>(pa[i]) - borrow;
|
|
pr[i] = static_cast<limb_type>(borrow);
|
|
borrow = (borrow >> CppInt1::limb_bits) & 1u;
|
|
++i;
|
|
}
|
|
// Any remaining digits are the same as those in pa:
|
|
if ((x != i) && (pa != pr))
|
|
std_constexpr::copy(pa + i, pa + x, pr + i);
|
|
BOOST_MP_ASSERT(0 == borrow);
|
|
|
|
//
|
|
// We may have lost digits, if so update limb usage count:
|
|
//
|
|
result.normalize();
|
|
result.sign(a.sign());
|
|
if (swapped)
|
|
result.negate();
|
|
}
|
|
|
|
|
|
#ifdef BOOST_MP_HAS_IMMINTRIN_H
|
|
//
|
|
// This is the key addition routine where all the argument types are non-trivial cpp_int's:
|
|
//
|
|
//
|
|
// This optimization is limited to: GCC, LLVM, ICC (Intel), MSVC for x86_64 and i386.
|
|
// If your architecture and compiler supports ADC intrinsic, please file a bug
|
|
//
|
|
// As of May, 2020 major compilers don't recognize carry chain though adc
|
|
// intrinsics are used to hint compilers to use ADC and still compilers don't
|
|
// unroll the loop efficiently (except LLVM) so manual unrolling is done.
|
|
//
|
|
// Also note that these intrinsics were only introduced by Intel as part of the
|
|
// ADX processor extensions, even though the addc instruction has been available
|
|
// for basically all x86 processors. That means gcc-9, clang-9, msvc-14.2 and up
|
|
// are required to support these intrinsics.
|
|
//
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
#ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
|
|
if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
|
|
{
|
|
add_unsigned_constexpr(result, a, b);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
using std::swap;
|
|
|
|
// Nothing fancy, just let uintmax_t take the strain:
|
|
std::size_t m(0), x(0);
|
|
std::size_t as = a.size();
|
|
std::size_t bs = b.size();
|
|
minmax(as, bs, m, x);
|
|
if (x == 1)
|
|
{
|
|
bool s = a.sign();
|
|
result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
|
|
result.sign(s);
|
|
return;
|
|
}
|
|
result.resize(x, x);
|
|
typename CppInt2::const_limb_pointer pa = a.limbs();
|
|
typename CppInt3::const_limb_pointer pb = b.limbs();
|
|
typename CppInt1::limb_pointer pr = result.limbs();
|
|
|
|
if (as < bs)
|
|
swap(pa, pb);
|
|
// First where a and b overlap:
|
|
std::size_t i = 0;
|
|
unsigned char carry = 0;
|
|
#if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
|
|
//
|
|
// Special case for 32-bit limbs on 64-bit architecture - we can process
|
|
// 2 limbs with each instruction.
|
|
//
|
|
for (; i + 8 <= m; i += 8)
|
|
{
|
|
carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 0), *(unsigned long long*)(pb + i + 0), (unsigned long long*)(pr + i));
|
|
carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 2), *(unsigned long long*)(pb + i + 2), (unsigned long long*)(pr + i + 2));
|
|
carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 4), *(unsigned long long*)(pb + i + 4), (unsigned long long*)(pr + i + 4));
|
|
carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 6), *(unsigned long long*)(pb + i + 6), (unsigned long long*)(pr + i + 6));
|
|
}
|
|
#else
|
|
for (; i + 4 <= m; i += 4)
|
|
{
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 0], pb[i + 0], pr + i);
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 1], pb[i + 1], pr + i + 1);
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 2], pb[i + 2], pr + i + 2);
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 3], pb[i + 3], pr + i + 3);
|
|
}
|
|
#endif
|
|
for (; i < m; ++i)
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i], pb[i], pr + i);
|
|
for (; i < x && carry; ++i)
|
|
// We know carry is 1, so we just need to increment pa[i] (ie add a literal 1) and capture the carry:
|
|
carry = ::boost::multiprecision::detail::addcarry_limb(0, pa[i], 1, pr + i);
|
|
if (i == x && carry)
|
|
{
|
|
// We overflowed, need to add one more limb:
|
|
result.resize(x + 1, x + 1);
|
|
if (result.size() > x)
|
|
result.limbs()[x] = static_cast<limb_type>(1u);
|
|
}
|
|
else if ((x != i) && (pa != pr))
|
|
// Copy remaining digits only if we need to:
|
|
std_constexpr::copy(pa + i, pa + x, pr + i);
|
|
result.normalize();
|
|
result.sign(a.sign());
|
|
}
|
|
}
|
|
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
#ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
|
|
if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
|
|
{
|
|
subtract_unsigned_constexpr(result, a, b);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
using std::swap;
|
|
|
|
// Nothing fancy, just let uintmax_t take the strain:
|
|
std::size_t m(0), x(0);
|
|
minmax(a.size(), b.size(), m, x);
|
|
//
|
|
// special cases for small limb counts:
|
|
//
|
|
if (x == 1)
|
|
{
|
|
bool s = a.sign();
|
|
limb_type al = *a.limbs();
|
|
limb_type bl = *b.limbs();
|
|
if (bl > al)
|
|
{
|
|
::boost::multiprecision::std_constexpr::swap(al, bl);
|
|
s = !s;
|
|
}
|
|
result = al - bl;
|
|
result.sign(s);
|
|
return;
|
|
}
|
|
// This isn't used till later, but comparison has to occur before we resize the result,
|
|
// as that may also resize a or b if this is an inplace operation:
|
|
int c = a.compare_unsigned(b);
|
|
// Set up the result vector:
|
|
result.resize(x, x);
|
|
// Now that a, b, and result are stable, get pointers to their limbs:
|
|
typename CppInt2::const_limb_pointer pa = a.limbs();
|
|
typename CppInt3::const_limb_pointer pb = b.limbs();
|
|
typename CppInt1::limb_pointer pr = result.limbs();
|
|
bool swapped = false;
|
|
if (c < 0)
|
|
{
|
|
swap(pa, pb);
|
|
swapped = true;
|
|
}
|
|
else if (c == 0)
|
|
{
|
|
result = static_cast<limb_type>(0);
|
|
return;
|
|
}
|
|
|
|
std::size_t i = 0;
|
|
unsigned char borrow = 0;
|
|
// First where a and b overlap:
|
|
#if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
|
|
//
|
|
// Special case for 32-bit limbs on 64-bit architecture - we can process
|
|
// 2 limbs with each instruction.
|
|
//
|
|
for (; i + 8 <= m; i += 8)
|
|
{
|
|
borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i), *reinterpret_cast<const unsigned long long*>(pb + i), reinterpret_cast<unsigned long long*>(pr + i));
|
|
borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 2), *reinterpret_cast<const unsigned long long*>(pb + i + 2), reinterpret_cast<unsigned long long*>(pr + i + 2));
|
|
borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 4), *reinterpret_cast<const unsigned long long*>(pb + i + 4), reinterpret_cast<unsigned long long*>(pr + i + 4));
|
|
borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 6), *reinterpret_cast<const unsigned long long*>(pb + i + 6), reinterpret_cast<unsigned long long*>(pr + i + 6));
|
|
}
|
|
#else
|
|
for(; i + 4 <= m; i += 4)
|
|
{
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 1], pb[i + 1], pr + i + 1);
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 2], pb[i + 2], pr + i + 2);
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 3], pb[i + 3], pr + i + 3);
|
|
}
|
|
#endif
|
|
for (; i < m; ++i)
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
|
|
|
|
// Now where only a has digits, only as long as we've borrowed:
|
|
while (borrow && (i < x))
|
|
{
|
|
borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], 0, pr + i);
|
|
++i;
|
|
}
|
|
// Any remaining digits are the same as those in pa:
|
|
if ((x != i) && (pa != pr))
|
|
std_constexpr::copy(pa + i, pa + x, pr + i);
|
|
BOOST_MP_ASSERT(0 == borrow);
|
|
|
|
//
|
|
// We may have lost digits, if so update limb usage count:
|
|
//
|
|
result.normalize();
|
|
result.sign(a.sign());
|
|
if (swapped)
|
|
result.negate();
|
|
} // constepxr.
|
|
}
|
|
|
|
#else
|
|
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
add_unsigned_constexpr(result, a, b);
|
|
}
|
|
|
|
template <class CppInt1, class CppInt2, class CppInt3>
|
|
inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
|
|
{
|
|
subtract_unsigned_constexpr(result, a, b);
|
|
}
|
|
|
|
#endif
|
|
|
|
} } } // namespaces
|
|
|
|
|
|
#endif
|
|
|
|
|