From f5a8506e15b95ce4ec912a562b0ea9aba7e02282 Mon Sep 17 00:00:00 2001 From: fbusato Date: Mon, 22 Jun 2026 12:48:46 -0700 Subject: [PATCH 01/14] draft --- docs/libcudacxx/extended_api/numeric.rst | 6 + .../extended_api/numeric/isclose.rst | 105 ++++++++ libcudacxx/include/cuda/__numeric/isclose.h | 237 ++++++++++++++++++ libcudacxx/include/cuda/numeric | 1 + .../cuda/numeric/isclose/isclose.pass.cpp | 181 +++++++++++++ 5 files changed, 530 insertions(+) create mode 100644 docs/libcudacxx/extended_api/numeric/isclose.rst create mode 100644 libcudacxx/include/cuda/__numeric/isclose.h create mode 100644 libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp diff --git a/docs/libcudacxx/extended_api/numeric.rst b/docs/libcudacxx/extended_api/numeric.rst index 3a00e877662..7a6957a242c 100644 --- a/docs/libcudacxx/extended_api/numeric.rst +++ b/docs/libcudacxx/extended_api/numeric.rst @@ -11,6 +11,7 @@ Numeric numeric/saturating_add_overflow numeric/div_overflow numeric/saturating_div_overflow + numeric/isclose numeric/mul_overflow numeric/saturating_mul_overflow numeric/narrow @@ -59,6 +60,11 @@ Numeric - CCCL 3.2.0 - CUDA 13.2 + * - :ref:`cuda::isclose ` + - Checks whether two values are approximately equal + - CCCL 3.5.0 + - CUDA 13.5 + * - :ref:`cuda::mul_overflow ` - Performs multiplication with overflow checking - CCCL 3.4.0 diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst new file mode 100644 index 00000000000..2da1f5d8b08 --- /dev/null +++ b/docs/libcudacxx/extended_api/numeric/isclose.rst @@ -0,0 +1,105 @@ +.. _libcudacxx-extended-api-numeric-isclose: + +``cuda::isclose`` +================= + +.. code:: cpp + + template + [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept; + + template + [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol) noexcept; + + template + [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol, AbsTol abs_tol) noexcept; + + template + [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept; + + template + [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol) noexcept; + + template + [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol, AbsTol abs_tol) noexcept; + +``cuda::isclose`` checks whether two values are approximately equal using the weak symmetric comparison described by +`PEP 485 `_: + +.. code:: cpp + + abs(lhs - rhs) <= max(abs_tol, rel_tol * max(abs(lhs), abs(rhs))) + +The overloads without ``abs_tol`` use ``abs_tol == 0``. The overloads without ``rel_tol`` use a default relative +tolerance based on the promoted comparison type: + +.. code:: cpp + + pow(10, -ceil_div(cuda::std::numeric_limits::max_digits10, 2)) + +For ``double`` comparisons, this is ``1e-9``, matching the default relative tolerance from PEP 485. Lower- and +higher-precision comparison types use a correspondingly smaller or larger default. + +**Parameters** + +- ``lhs``: The first value to compare. +- ``rhs``: The second value to compare. +- ``rel_tol``: The relative tolerance. Must be finite and non-negative. Passing ``0`` performs a purely absolute + tolerance check when ``abs_tol`` is non-zero. +- ``abs_tol``: The absolute tolerance. Must be finite and non-negative. This is useful for comparisons near zero. The + supplied type may promote to the value comparison type, but may not make the value comparison type wider. + +**Return value** + +- Returns ``true`` if ``lhs`` and ``rhs`` are close to each other, otherwise returns ``false``. + +**Constraints** + +- Scalar overloads require ``lhs`` and ``rhs`` to have the same arithmetic type. ``abs_tol`` must be representable in the + value comparison type after promotion. For example, ``double`` values may be compared with a ``float`` absolute + tolerance, but ``float`` values cannot use a ``double`` absolute tolerance. ``rel_tol`` is always a ``float``. +- Complex overloads accept ``cuda::std::complex`` and ``cuda::complex`` operands. ``lhs`` and ``rhs`` must have the + same complex type, and ``abs_tol`` must be representable in the complex value comparison type after promotion. + ``rel_tol`` is always a ``float``. + +**Special values** + +- NaN is never close to any value, including another NaN. +- Infinity and negative infinity are only close to themselves. +- With the default ``abs_tol == 0``, comparisons near zero generally require an explicitly supplied absolute tolerance. + +For complex values, ``cuda::isclose`` follows the ``cmath.isclose`` model from PEP 485: the difference and scaling values +are computed from complex magnitudes, rather than comparing the real and imaginary components independently. + +Example +------- + +.. code:: cuda + + #include + #include + #include + #include + + __global__ void kernel() + { + assert(cuda::isclose(1.0, 1.0 + 5e-10)); + assert(!cuda::isclose(1.0, 1.0 + 5e-8)); + + assert(!cuda::isclose(0.0, 1e-12)); + assert(cuda::isclose(0.0, 1e-12, 0.0, 1e-12)); + + cuda::std::complex z1{1.0, 1.0}; + cuda::std::complex z2{2.0, 0.0}; + assert(cuda::isclose(z1, z2, 0.75)); + + cuda::complex z3{1.0f, 1.0f}; + cuda::complex z4{2.0f, 0.0f}; + assert(cuda::isclose(z3, z4, 0.75f)); + } + + int main() + { + kernel<<<1, 1>>>(); + cudaDeviceSynchronize(); + } diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h new file mode 100644 index 00000000000..76e7d7c91e8 --- /dev/null +++ b/libcudacxx/include/cuda/__numeric/isclose.h @@ -0,0 +1,237 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___NUMERIC_ISCLOSE_H +#define _CUDA___NUMERIC_ISCLOSE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA + +template +using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>; + +template +using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>; + +template ::value> +inline constexpr bool __isclose_has_comparison_v = false; + +template +inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true; + +template > +inline constexpr bool __isclose_has_comparison2_v = false; + +template +inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> = + ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value; + +template > +inline constexpr bool __isclose_has_abs_tol_v = false; + +template +inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> = + ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>; + +template > +inline constexpr bool __isclose_has_complex_comparison_v = false; + +template +inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> = + __isclose_has_comparison_v; + +template > +inline constexpr bool __isclose_has_complex_abs_tol_v = false; + +template +inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> = + __isclose_has_abs_tol_v; + +template +[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept +{ + constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2); + auto __tol = 1.0f; + for (int __i = 0; __i < __digits; ++__i) + { + __tol /= 10.0f; + } + return __tol; +} + +template +_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept +{ + _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f, + "cuda::isclose: relative tolerance must be finite and non-negative"); + _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0}, + "cuda::isclose: absolute tolerance must be finite and non-negative"); +} + +template +[[nodiscard]] _CCCL_API constexpr bool __isclose_compare( + const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept +{ + return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs)); +} + +template +[[nodiscard]] _CCCL_API constexpr bool +__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept +{ + ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol); + + if (__lhs == __rhs) + { + return true; + } + if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs)) + { + return false; + } + if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs)) + { + return false; + } + + return ::cuda::__isclose_compare( + ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol); +} + +template +[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept +{ + return ::cuda::std::hypot(__real_part, __imag_part); +} + +template +[[nodiscard]] _CCCL_API bool __isclose_complex_impl( + const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept +{ + using _Value = typename _ComplexType::value_type; + using _Comparison = __isclose_comparison_t<_Value>; + + const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs)); + const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs)); + const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs)); + const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs)); + const auto __abs = static_cast<_Comparison>(__abs_tol); + + ::cuda::__isclose_validate_tolerances(__rel_tol, __abs); + + if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag) + { + return true; + } + if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real) + || ::cuda::std::isnan(__rhs_imag)) + { + return false; + } + if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real) + || ::cuda::std::isinf(__rhs_imag)) + { + return false; + } + + const auto __diff = ::cuda::__isclose_hypot( + static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag)); + const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag); + const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag); + return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs); +} + +//! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance. +_CCCL_TEMPLATE(class _Tp, class _AbsTol) +_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>) +[[nodiscard]] _CCCL_API constexpr bool +isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept +{ + using _Comparison = __isclose_comparison_t<_Tp>; + return ::cuda::__isclose_impl( + static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol)); +} + +//! @brief Checks whether two arithmetic values are close to each other using a relative tolerance. +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>) +[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept +{ + using _Comparison = __isclose_comparison_t<_Tp>; + return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0}); +} + +//! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance. +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>) +[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept +{ + using _Comparison = __isclose_comparison_t<_Tp>; + return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0}); +} + +//! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance. +_CCCL_TEMPLATE(class _ComplexType, class _AbsTol) +_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>) +[[nodiscard]] _CCCL_API bool +isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept +{ + return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol); +} + +//! @brief Checks whether two complex values are close to each other using a relative tolerance. +_CCCL_TEMPLATE(class _ComplexType) +_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>) +[[nodiscard]] _CCCL_API bool +isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept +{ + using _Comparison = __isclose_comparison_t; + return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0}); +} + +//! @brief Checks whether two complex values are close to each other using the default relative tolerance. +_CCCL_TEMPLATE(class _ComplexType) +_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>) +[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept +{ + using _Comparison = __isclose_comparison_t; + return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0}); +} + +_CCCL_END_NAMESPACE_CUDA + +#include + +#endif // _CUDA___NUMERIC_ISCLOSE_H diff --git a/libcudacxx/include/cuda/numeric b/libcudacxx/include/cuda/numeric index f9beaa7fa7f..9cf3feaf42f 100644 --- a/libcudacxx/include/cuda/numeric +++ b/libcudacxx/include/cuda/numeric @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp new file mode 100644 index 00000000000..25cd3dea157 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp @@ -0,0 +1,181 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +template +inline constexpr bool has_isclose_v = false; + +template +inline constexpr bool + has_isclose_v(), cuda::std::declval()))>> = + true; + +template +inline constexpr bool has_isclose_abs_tol_v = false; + +template +inline constexpr bool has_isclose_abs_tol_v< + T, + AbsTol, + cuda::std::void_t(), cuda::std::declval(), 0.0f, cuda::std::declval()))>> = true; + +template +TEST_FUNC constexpr float default_rel_tol() +{ + constexpr auto digits = (cuda::std::numeric_limits::max_digits10 + 1) / 2; + auto tol = 1.0f; + for (int i = 0; i < digits; ++i) + { + tol /= 10.0f; + } + return tol; +} + +template +TEST_FUNC constexpr bool test_floating_point() +{ + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(noexcept(cuda::isclose(T{}, T{}))); + static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f))); + static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{}))); + + constexpr auto tol = default_rel_tol(); + assert(cuda::isclose(T{1}, T{1})); + assert(cuda::isclose(T{1}, T{1} + tol / T{2})); + assert(!cuda::isclose(T{1}, T{1} + tol * T{2})); + + assert(cuda::isclose(T{10}, T{11}, 0.1f)); + assert(cuda::isclose(T{11}, T{10}, 0.1f)); + assert(!cuda::isclose(T{10}, T{12}, 0.1f)); + + assert(!cuda::isclose(T{0}, tol / T{2})); + assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5})); + assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25})); + + const auto inf = cuda::std::numeric_limits::infinity(); + const auto nan = cuda::std::numeric_limits::quiet_NaN(); + assert(cuda::isclose(inf, inf)); + assert(cuda::isclose(-inf, -inf)); + assert(!cuda::isclose(inf, -inf)); + assert(!cuda::isclose(inf, T{1}, 10.0f)); + assert(!cuda::isclose(nan, nan)); + assert(!cuda::isclose(nan, T{})); + + return true; +} + +TEST_FUNC constexpr bool test_integral() +{ + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(noexcept(cuda::isclose(0, 0))); + static_assert(noexcept(cuda::isclose(0, 0, 0.0f))); + static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0))); + + assert(cuda::isclose(1, 1)); + assert(cuda::isclose(1u, 1u)); + assert(!cuda::isclose(1, 2)); + assert(cuda::isclose(100, 101, 0.02f)); + assert(cuda::isclose(101, 100, 0.02f)); + assert(!cuda::isclose(100, 103, 0.02f)); + assert(cuda::isclose(0, 1, 0.0f, 1.0)); + assert(!cuda::isclose(0, 1, 0.0f, 0.5)); + + static_assert(!has_isclose_v); + static_assert(!has_isclose_v); + static_assert(has_isclose_abs_tol_v); + static_assert(has_isclose_abs_tol_v); + static_assert(!has_isclose_abs_tol_v); + static_assert(has_isclose_abs_tol_v); + static_assert(has_isclose_abs_tol_v); + + return true; +} + +template +TEST_FUNC void test_complex() +{ + using T = typename Complex::value_type; + + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(noexcept(cuda::isclose(Complex{}, Complex{}))); + static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f))); + static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))); + + assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}})); + assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f)); + assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f)); + + // PEP 485 uses complex magnitudes, not component-wise scalar comparisons. + assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f)); + + assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}})); + assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5})); + assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25})); + + const auto inf = cuda::std::numeric_limits::infinity(); + const auto nan = cuda::std::numeric_limits::quiet_NaN(); + assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}})); + assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f)); + assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}})); + assert(!cuda::isclose(Complex{nan, T{}}, Complex{})); +} + +TEST_FUNC void test_mixed_complex() +{ + static_assert(!has_isclose_v, cuda::std::complex>); + static_assert(!has_isclose_v, cuda::complex>); + static_assert(!has_isclose_v, cuda::std::complex>); + static_assert(has_isclose_abs_tol_v, float>); + static_assert(has_isclose_abs_tol_v, float>); + static_assert(!has_isclose_abs_tol_v, double>); + static_assert(!has_isclose_abs_tol_v, double>); +} + +TEST_FUNC constexpr bool test() +{ + test_floating_point(); + test_floating_point(); +#if _CCCL_HAS_LONG_DOUBLE() + test_floating_point(); +#endif // _CCCL_HAS_LONG_DOUBLE() + + test_integral(); + + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); + + test_complex>(); + test_complex>(); + test_complex>(); + test_complex>(); + test_mixed_complex(); + + return 0; +} From f89bda2ea2a899372537f442992d576094361b04 Mon Sep 17 00:00:00 2001 From: fbusato Date: Tue, 23 Jun 2026 11:13:05 -0700 Subject: [PATCH 02/14] simplifications --- libcudacxx/include/cuda/__numeric/isclose.h | 66 ++++++++------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h index 76e7d7c91e8..4559c34f7de 100644 --- a/libcudacxx/include/cuda/__numeric/isclose.h +++ b/libcudacxx/include/cuda/__numeric/isclose.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -80,7 +81,7 @@ inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, tru __isclose_has_abs_tol_v; template -[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept +[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept { constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2); auto __tol = 1.0f; @@ -91,49 +92,28 @@ template return __tol; } -template -_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept -{ - _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f, - "cuda::isclose: relative tolerance must be finite and non-negative"); - _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0}, - "cuda::isclose: absolute tolerance must be finite and non-negative"); -} - -template -[[nodiscard]] _CCCL_API constexpr bool __isclose_compare( - const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept -{ - return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs)); -} - template [[nodiscard]] _CCCL_API constexpr bool __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept { - ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol); + _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f), + "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]"); + _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0}, + "cuda::isclose: absolute tolerance must be finite and non-negative"); if (__lhs == __rhs) { return true; } - if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs)) + if (!::cuda::std::isfinite(__lhs) || !::cuda::std::isfinite(__rhs)) { return false; } - if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs)) - { - return false; - } - - return ::cuda::__isclose_compare( - ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol); -} - -template -[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept -{ - return ::cuda::std::hypot(__real_part, __imag_part); + const auto __diff = ::cuda::std::abs(__lhs - __rhs); + const auto __lhs_abs = ::cuda::std::abs(__lhs); + const auto __rhs_abs = ::cuda::std::abs(__rhs); + const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)); + return __diff <= ::cuda::std::max(__abs_tol, __rel_value); } template @@ -143,14 +123,17 @@ template using _Value = typename _ComplexType::value_type; using _Comparison = __isclose_comparison_t<_Value>; + _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f, + "cuda::isclose: relative tolerance must be finite and non-negative"); + _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0}, + "cuda::isclose: absolute tolerance must be finite and non-negative"); + const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs)); const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs)); const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs)); const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs)); const auto __abs = static_cast<_Comparison>(__abs_tol); - ::cuda::__isclose_validate_tolerances(__rel_tol, __abs); - if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag) { return true; @@ -166,13 +149,16 @@ template return false; } - const auto __diff = ::cuda::__isclose_hypot( + const auto __diff = ::cuda::std::hypot( static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag)); - const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag); - const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag); - return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs); + const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag); + const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag); + return __diff + <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs))); } +//---------------------------------------------------------------------------------------------------------------------- + //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance. _CCCL_TEMPLATE(class _Tp, class _AbsTol) _CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>) @@ -199,7 +185,7 @@ _CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>) [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept { using _Comparison = __isclose_comparison_t<_Tp>; - return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0}); + return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0}); } //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance. @@ -227,7 +213,7 @@ _CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>) [[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept { using _Comparison = __isclose_comparison_t; - return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0}); + return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0}); } _CCCL_END_NAMESPACE_CUDA From 3f07bfda8923054891c212da07fd3ffa153e017f Mon Sep 17 00:00:00 2001 From: fbusato Date: Tue, 23 Jun 2026 18:06:54 -0700 Subject: [PATCH 03/14] unit test --- libcudacxx/include/cuda/__numeric/isclose.h | 222 ++++++++++-------- .../cuda/numeric/isclose/isclose.pass.cpp | 168 +++++++------ 2 files changed, 214 insertions(+), 176 deletions(-) diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h index 4559c34f7de..41a5acf8f8e 100644 --- a/libcudacxx/include/cuda/__numeric/isclose.h +++ b/libcudacxx/include/cuda/__numeric/isclose.h @@ -22,85 +22,52 @@ #endif // no system header #include +#include #include #include +#include #include #include #include #include #include -#include -#include #include #include +#include +#include +#include #include -#include +#include +#include #include _CCCL_BEGIN_NAMESPACE_CUDA -template -using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>; +template +using __isclose_compare_t = ::cuda::std:: + conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>; -template -using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>; - -template ::value> -inline constexpr bool __isclose_has_comparison_v = false; - -template -inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true; - -template > -inline constexpr bool __isclose_has_comparison2_v = false; - -template -inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> = - ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value; - -template > -inline constexpr bool __isclose_has_abs_tol_v = false; - -template -inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> = - ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>; - -template > -inline constexpr bool __isclose_has_complex_comparison_v = false; - -template -inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> = - __isclose_has_comparison_v; - -template > -inline constexpr bool __isclose_has_complex_abs_tol_v = false; - -template -inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> = - __isclose_has_abs_tol_v; - -template +template [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept { constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2); - auto __tol = 1.0f; + auto __exp = 1.0f; for (int __i = 0; __i < __digits; ++__i) { - __tol /= 10.0f; + __exp *= 10.0f; } - return __tol; + return 1.0f / __exp; } -template +template [[nodiscard]] _CCCL_API constexpr bool -__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept +__isclose_fp_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept { _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f), "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]"); _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be finite and non-negative"); - if (__lhs == __rhs) { return true; @@ -116,104 +83,159 @@ __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _T return __diff <= ::cuda::std::max(__abs_tol, __rel_value); } -template -[[nodiscard]] _CCCL_API bool __isclose_complex_impl( +template +[[nodiscard]] _CCCL_HOST_DEVICE_API bool __isclose_complex_impl( const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept { - using _Value = typename _ComplexType::value_type; - using _Comparison = __isclose_comparison_t<_Value>; - - _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f, - "cuda::isclose: relative tolerance must be finite and non-negative"); - _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0}, + using __scalar_t = typename _ComplexType::value_type; + using __compare_t = __isclose_compare_t<__scalar_t>; + _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f), + "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]"); + _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= __scalar_t{0}, "cuda::isclose: absolute tolerance must be finite and non-negative"); - const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs)); - const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs)); - const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs)); - const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs)); - const auto __abs = static_cast<_Comparison>(__abs_tol); + const auto __lhs_real = static_cast<__compare_t>(::cuda::__get_real(__lhs)); + const auto __lhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__lhs)); + const auto __rhs_real = static_cast<__compare_t>(::cuda::__get_real(__rhs)); + const auto __rhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__rhs)); + const auto __abs = static_cast<__compare_t>(__abs_tol); if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag) { return true; } - if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real) - || ::cuda::std::isnan(__rhs_imag)) + if (!::cuda::std::isfinite(__lhs_real) || !::cuda::std::isfinite(__lhs_imag) || !::cuda::std::isfinite(__rhs_real) + || !::cuda::std::isfinite(__rhs_imag)) { return false; } - if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real) - || ::cuda::std::isinf(__rhs_imag)) + const auto __diff = ::cuda::std::hypot(__lhs_real - __rhs_real, __lhs_imag - __rhs_imag); + const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag); + const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag); + const auto __rel_value = __rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs); + return __diff <= ::cuda::std::max(__abs, __rel_value); +} + +template +[[nodiscard]] _CCCL_API constexpr ::cuda::std::make_unsigned_t<_Tp> +__safe_diff(const _Tp __lhs, const _Tp __rhs) noexcept +{ + using __unsigned_t = ::cuda::std::make_unsigned_t<_Tp>; + const auto __lhs_abs = ::cuda::uabs(__lhs); + const auto __rhs_abs = ::cuda::uabs(__rhs); + const auto __is_lhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __lhs < _Tp{0}; + const auto __is_rhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __rhs < _Tp{0}; + + if (__is_lhs_negative != __is_rhs_negative) { - return false; + return static_cast<__unsigned_t>(__lhs_abs + __rhs_abs); } + return (__lhs_abs < __rhs_abs) + ? static_cast<__unsigned_t>(__rhs_abs - __lhs_abs) + : static_cast<__unsigned_t>(__lhs_abs - __rhs_abs); +} - const auto __diff = ::cuda::std::hypot( - static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag)); - const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag); - const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag); - return __diff - <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs))); +template +[[nodiscard]] _CCCL_API constexpr bool +__isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept +{ + _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f), + "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]"); + if constexpr (::cuda::std::__cccl_is_signed_integer_v<_Tp>) + { + _CCCL_ASSERT(__abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be non-negative"); + } + using __unsigned_t = ::cuda::std::make_unsigned_t<_Tp>; + const auto __lhs_abs = ::cuda::uabs(__lhs); + const auto __rhs_abs = ::cuda::uabs(__rhs); + const auto __diff = ::cuda::__safe_diff(__lhs, __rhs); + const auto __abs = static_cast<__unsigned_t>(__abs_tol); + const auto __rel_value = static_cast<__unsigned_t>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)); + return __diff <= ::cuda::std::max(__abs, __rel_value); } //---------------------------------------------------------------------------------------------------------------------- +// Public API + +// Scalar overloads //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance. -_CCCL_TEMPLATE(class _Tp, class _AbsTol) -_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>) +_CCCL_TEMPLATE(typename _Tp) +_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>) [[nodiscard]] _CCCL_API constexpr bool -isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept +isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept { - using _Comparison = __isclose_comparison_t<_Tp>; - return ::cuda::__isclose_impl( - static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol)); + if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>) + { + return ::cuda::__isclose_integer_impl(__lhs, __rhs, __rel_tol, __abs_tol); + } + else + { + using __value_t = __isclose_compare_t<_Tp>; + return ::cuda::__isclose_fp_impl( + static_cast<__value_t>(__lhs), static_cast<__value_t>(__rhs), __rel_tol, static_cast<__value_t>(__abs_tol)); + } } //! @brief Checks whether two arithmetic values are close to each other using a relative tolerance. -_CCCL_TEMPLATE(class _Tp) -_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>) +_CCCL_TEMPLATE(typename _Tp) +_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>) [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept { - using _Comparison = __isclose_comparison_t<_Tp>; - return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0}); + return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0}); } //! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance. -_CCCL_TEMPLATE(class _Tp) -_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>) +_CCCL_TEMPLATE(typename _Tp) +_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>) [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept { - using _Comparison = __isclose_comparison_t<_Tp>; - return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0}); + if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>) + { + return __lhs == __rhs; + } + else + { + constexpr auto __rel_tol = ::cuda::__isclose_default_relative_tolerance<_Tp>(); + return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0}); + } } +// Complex overloads + +template > +inline constexpr bool __isclose_complex_comparison_v = false; + +template +inline constexpr bool __isclose_complex_comparison_v<_Tp, _AbsTol, true> = + ::cuda::std::is_same_v; + //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance. -_CCCL_TEMPLATE(class _ComplexType, class _AbsTol) -_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>) -[[nodiscard]] _CCCL_API bool +_CCCL_TEMPLATE(typename _ComplexType, typename _AbsTol) +_CCCL_REQUIRES(__isclose_complex_comparison_v<_ComplexType, _AbsTol>) +[[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept { return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol); } //! @brief Checks whether two complex values are close to each other using a relative tolerance. -_CCCL_TEMPLATE(class _ComplexType) -_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>) -[[nodiscard]] _CCCL_API bool +_CCCL_TEMPLATE(typename _ComplexType) +_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>) +[[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept { - using _Comparison = __isclose_comparison_t; - return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0}); + using __scalar_t = typename _ComplexType::value_type; + return ::cuda::isclose(__lhs, __rhs, __rel_tol, __scalar_t{0}); } //! @brief Checks whether two complex values are close to each other using the default relative tolerance. -_CCCL_TEMPLATE(class _ComplexType) -_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>) -[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept +_CCCL_TEMPLATE(typename _ComplexType) +_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>) +[[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept { - using _Comparison = __isclose_comparison_t; - return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0}); + using __scalar_t = typename _ComplexType::value_type; + return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<__scalar_t>(), __scalar_t{0}); } _CCCL_END_NAMESPACE_CUDA diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp index 25cd3dea157..54cef3f3761 100644 --- a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp @@ -15,15 +15,11 @@ #include #include -#include "test_macros.h" - -template -inline constexpr bool has_isclose_v = false; +#if _CCCL_HAS_HOST_STD_LIB() +# include +#endif // _CCCL_HAS_HOST_STD_LIB() -template -inline constexpr bool - has_isclose_v(), cuda::std::declval()))>> = - true; +#include "test_macros.h" template inline constexpr bool has_isclose_abs_tol_v = false; @@ -35,18 +31,6 @@ inline constexpr bool has_isclose_abs_tol_v< cuda::std::void_t(), cuda::std::declval(), 0.0f, cuda::std::declval()))>> = true; -template -TEST_FUNC constexpr float default_rel_tol() -{ - constexpr auto digits = (cuda::std::numeric_limits::max_digits10 + 1) / 2; - auto tol = 1.0f; - for (int i = 0; i < digits; ++i) - { - tol /= 10.0f; - } - return tol; -} - template TEST_FUNC constexpr bool test_floating_point() { @@ -57,57 +41,40 @@ TEST_FUNC constexpr bool test_floating_point() static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f))); static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{}))); - constexpr auto tol = default_rel_tol(); + constexpr auto tol = cuda::__isclose_default_relative_tolerance(); assert(cuda::isclose(T{1}, T{1})); - assert(cuda::isclose(T{1}, T{1} + tol / T{2})); - assert(!cuda::isclose(T{1}, T{1} + tol * T{2})); + assert(cuda::isclose(T{1}, T{1} + static_cast(tol / 2.0f))); + assert(!cuda::isclose(T{1}, T{1} + static_cast(tol * 2.0f))); assert(cuda::isclose(T{10}, T{11}, 0.1f)); assert(cuda::isclose(T{11}, T{10}, 0.1f)); assert(!cuda::isclose(T{10}, T{12}, 0.1f)); - assert(!cuda::isclose(T{0}, tol / T{2})); - assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5})); - assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25})); - const auto inf = cuda::std::numeric_limits::infinity(); const auto nan = cuda::std::numeric_limits::quiet_NaN(); assert(cuda::isclose(inf, inf)); assert(cuda::isclose(-inf, -inf)); assert(!cuda::isclose(inf, -inf)); - assert(!cuda::isclose(inf, T{1}, 10.0f)); assert(!cuda::isclose(nan, nan)); assert(!cuda::isclose(nan, T{})); - return true; } +template TEST_FUNC constexpr bool test_integral() { - static_assert(cuda::std::is_same_v); - static_assert(cuda::std::is_same_v); - static_assert(cuda::std::is_same_v); - static_assert(noexcept(cuda::isclose(0, 0))); - static_assert(noexcept(cuda::isclose(0, 0, 0.0f))); - static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0))); - - assert(cuda::isclose(1, 1)); - assert(cuda::isclose(1u, 1u)); - assert(!cuda::isclose(1, 2)); - assert(cuda::isclose(100, 101, 0.02f)); - assert(cuda::isclose(101, 100, 0.02f)); - assert(!cuda::isclose(100, 103, 0.02f)); - assert(cuda::isclose(0, 1, 0.0f, 1.0)); - assert(!cuda::isclose(0, 1, 0.0f, 0.5)); - - static_assert(!has_isclose_v); - static_assert(!has_isclose_v); - static_assert(has_isclose_abs_tol_v); - static_assert(has_isclose_abs_tol_v); - static_assert(!has_isclose_abs_tol_v); - static_assert(has_isclose_abs_tol_v); - static_assert(has_isclose_abs_tol_v); + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(cuda::std::is_same_v); + static_assert(noexcept(cuda::isclose(T{}, T{}))); + static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f))); + static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{}))); + assert(cuda::isclose(T{1}, T{1})); + assert(!cuda::isclose(T{1}, T{2})); + assert(cuda::isclose(T{100}, T{101}, 0.02f)); + assert(cuda::isclose(T{101}, T{100}, 0.02f)); + assert(!cuda::isclose(T{100}, T{103}, 0.02f)); return true; } @@ -115,42 +82,47 @@ template TEST_FUNC void test_complex() { using T = typename Complex::value_type; - static_assert(cuda::std::is_same_v); static_assert(cuda::std::is_same_v); static_assert(cuda::std::is_same_v); - static_assert(noexcept(cuda::isclose(Complex{}, Complex{}))); - static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f))); - static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))); + static_assert(noexcept(cuda::isclose(cuda::std::declval(), cuda::std::declval()))); + static_assert(noexcept(cuda::isclose(cuda::std::declval(), cuda::std::declval(), 0.0f))); + static_assert(noexcept( + cuda::isclose(cuda::std::declval(), cuda::std::declval(), 0.0f, cuda::std::declval()))); assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}})); assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f)); assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f)); - // PEP 485 uses complex magnitudes, not component-wise scalar comparisons. - assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f)); - assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}})); - assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5})); + auto abs_tol = T{0.5}; +#if _LIBCUDACXX_HAS_NVBF16() + if constexpr (cuda::std::is_same_v) + { + abs_tol = T{0.51}; + } +#endif // _LIBCUDACXX_HAS_NVBF16() + assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, abs_tol)); assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25})); const auto inf = cuda::std::numeric_limits::infinity(); const auto nan = cuda::std::numeric_limits::quiet_NaN(); assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}})); - assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f)); + assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 1.0f)); assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}})); assert(!cuda::isclose(Complex{nan, T{}}, Complex{})); } -TEST_FUNC void test_mixed_complex() +TEST_FUNC constexpr void test_invalid_complex_cases() { - static_assert(!has_isclose_v, cuda::std::complex>); - static_assert(!has_isclose_v, cuda::complex>); - static_assert(!has_isclose_v, cuda::std::complex>); - static_assert(has_isclose_abs_tol_v, float>); - static_assert(has_isclose_abs_tol_v, float>); + static_assert(!has_isclose_abs_tol_v, float>); + static_assert(!has_isclose_abs_tol_v, float>); static_assert(!has_isclose_abs_tol_v, double>); static_assert(!has_isclose_abs_tol_v, double>); +#if _CCCL_HAS_HOST_STD_LIB() + static_assert(!has_isclose_abs_tol_v, float>); + static_assert(!has_isclose_abs_tol_v, double>); +#endif // _CCCL_HAS_HOST_STD_LIB() } TEST_FUNC constexpr bool test() @@ -160,22 +132,66 @@ TEST_FUNC constexpr bool test() #if _CCCL_HAS_LONG_DOUBLE() test_floating_point(); #endif // _CCCL_HAS_LONG_DOUBLE() +#if _CCCL_HAS_FLOAT128() + test_floating_point<__float128>(); +#endif // _CCCL_HAS_FLOAT128() + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); + test_integral(); +#if _CCCL_HAS_INT128() + test_integral<__int128_t>(); + test_integral<__uint128_t>(); +#endif // _CCCL_HAS_INT128() + + test_invalid_complex_cases(); + return true; +} - test_integral(); +template