From f5a8506e15b95ce4ec912a562b0ea9aba7e02282 Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Mon, 22 Jun 2026 12:48:46 -0700
Subject: [PATCH 01/14] draft

---
 docs/libcudacxx/extended_api/numeric.rst      |   6 +
 .../extended_api/numeric/isclose.rst          | 105 ++++++++
 libcudacxx/include/cuda/__numeric/isclose.h   | 237 ++++++++++++++++++
 libcudacxx/include/cuda/numeric               |   1 +
 .../cuda/numeric/isclose/isclose.pass.cpp     | 181 +++++++++++++
 5 files changed, 530 insertions(+)
 create mode 100644 docs/libcudacxx/extended_api/numeric/isclose.rst
 create mode 100644 libcudacxx/include/cuda/__numeric/isclose.h
 create mode 100644 libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
diff --git a/docs/libcudacxx/extended_api/numeric.rst b/docs/libcudacxx/extended_api/numeric.rst
index 3a00e877662..7a6957a242c 100644
--- a/docs/libcudacxx/extended_api/numeric.rst
+++ b/docs/libcudacxx/extended_api/numeric.rst
@@ -11,6 +11,7 @@ Numeric
    numeric/saturating_add_overflow
    numeric/div_overflow
    numeric/saturating_div_overflow
+   numeric/isclose
    numeric/mul_overflow
    numeric/saturating_mul_overflow
    numeric/narrow
@@ -59,6 +60,11 @@ Numeric
      - CCCL 3.2.0
      - CUDA 13.2
 
+   * - :ref:`cuda::isclose <libcudacxx-extended-api-numeric-isclose>`
+     - Checks whether two values are approximately equal
+     - CCCL 3.5.0
+     - CUDA 13.5
+
    * - :ref:`cuda::mul_overflow <libcudacxx-extended-api-numeric-mul_overflow>`
      - Performs multiplication with overflow checking
      - CCCL 3.4.0
diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst
new file mode 100644
index 00000000000..2da1f5d8b08
--- /dev/null
+++ b/docs/libcudacxx/extended_api/numeric/isclose.rst
@@ -0,0 +1,105 @@
+.. _libcudacxx-extended-api-numeric-isclose:
+
+``cuda::isclose``
+=================
+
+.. code:: cpp
+
+   template <class T>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept;
+
+   template <class T>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol) noexcept;
+
+   template <class T, class AbsTol>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol, AbsTol abs_tol) noexcept;
+
+   template <class Complex>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
+
+   template <class Complex>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol) noexcept;
+
+   template <class Complex, class AbsTol>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol, AbsTol abs_tol) noexcept;
+
+``cuda::isclose`` checks whether two values are approximately equal using the weak symmetric comparison described by
+`PEP 485 <https://peps.python.org/pep-0485/>`_:
+
+.. code:: cpp
+
+   abs(lhs - rhs) <= max(abs_tol, rel_tol * max(abs(lhs), abs(rhs)))
+
+The overloads without ``abs_tol`` use ``abs_tol == 0``. The overloads without ``rel_tol`` use a default relative
+tolerance based on the promoted comparison type:
+
+.. code:: cpp
+
+   pow(10, -ceil_div(cuda::std::numeric_limits<Comparison>::max_digits10, 2))
+
+For ``double`` comparisons, this is ``1e-9``, matching the default relative tolerance from PEP 485. Lower- and
+higher-precision comparison types use a correspondingly smaller or larger default.
+
+**Parameters**
+
+- ``lhs``: The first value to compare.
+- ``rhs``: The second value to compare.
+- ``rel_tol``: The relative tolerance. Must be finite and non-negative. Passing ``0`` performs a purely absolute
+  tolerance check when ``abs_tol`` is non-zero.
+- ``abs_tol``: The absolute tolerance. Must be finite and non-negative. This is useful for comparisons near zero. The
+  supplied type may promote to the value comparison type, but may not make the value comparison type wider.
+
+**Return value**
+
+- Returns ``true`` if ``lhs`` and ``rhs`` are close to each other, otherwise returns ``false``.
+
+**Constraints**
+
+- Scalar overloads require ``lhs`` and ``rhs`` to have the same arithmetic type. ``abs_tol`` must be representable in the
+  value comparison type after promotion. For example, ``double`` values may be compared with a ``float`` absolute
+  tolerance, but ``float`` values cannot use a ``double`` absolute tolerance. ``rel_tol`` is always a ``float``.
+- Complex overloads accept ``cuda::std::complex<T>`` and ``cuda::complex<T>`` operands. ``lhs`` and ``rhs`` must have the
+  same complex type, and ``abs_tol`` must be representable in the complex value comparison type after promotion.
+  ``rel_tol`` is always a ``float``.
+
+**Special values**
+
+- NaN is never close to any value, including another NaN.
+- Infinity and negative infinity are only close to themselves.
+- With the default ``abs_tol == 0``, comparisons near zero generally require an explicitly supplied absolute tolerance.
+
+For complex values, ``cuda::isclose`` follows the ``cmath.isclose`` model from PEP 485: the difference and scaling values
+are computed from complex magnitudes, rather than comparing the real and imaginary components independently.
+
+Example
+-------
+
+.. code:: cuda
+
+    #include <cuda/__complex_>
+    #include <cuda/numeric>
+    #include <cuda/std/cassert>
+    #include <cuda/std/complex>
+
+    __global__ void kernel()
+    {
+        assert(cuda::isclose(1.0, 1.0 + 5e-10));
+        assert(!cuda::isclose(1.0, 1.0 + 5e-8));
+
+        assert(!cuda::isclose(0.0, 1e-12));
+        assert(cuda::isclose(0.0, 1e-12, 0.0, 1e-12));
+
+        cuda::std::complex<double> z1{1.0, 1.0};
+        cuda::std::complex<double> z2{2.0, 0.0};
+        assert(cuda::isclose(z1, z2, 0.75));
+
+        cuda::complex<float> z3{1.0f, 1.0f};
+        cuda::complex<float> z4{2.0f, 0.0f};
+        assert(cuda::isclose(z3, z4, 0.75f));
+    }
+
+    int main()
+    {
+        kernel<<<1, 1>>>();
+        cudaDeviceSynchronize();
+    }
diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
new file mode 100644
index 00000000000..76e7d7c91e8
--- /dev/null
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -0,0 +1,237 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___NUMERIC_ISCLOSE_H
+#define _CUDA___NUMERIC_ISCLOSE_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/__cmath/ceil_div.h>
+#include <cuda/__complex/get_real_imag.h>
+#include <cuda/__complex/traits.h>
+#include <cuda/std/__algorithm/max.h>
+#include <cuda/std/__cmath/abs.h>
+#include <cuda/std/__cmath/hypot.h>
+#include <cuda/std/__cmath/isfinite.h>
+#include <cuda/std/__cmath/isinf.h>
+#include <cuda/std/__cmath/isnan.h>
+#include <cuda/std/__concepts/concept_macros.h>
+#include <cuda/std/__limits/numeric_limits.h>
+#include <cuda/std/__type_traits/is_same.h>
+#include <cuda/std/__type_traits/promote.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA
+
+template <class _Tp>
+using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>;
+
+template <class _Tp, class _Up>
+using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>;
+
+template <class _Tp, bool = ::cuda::std::__promote<_Tp>::value>
+inline constexpr bool __isclose_has_comparison_v = false;
+
+template <class _Tp>
+inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true;
+
+template <class _Tp, class _Up, bool = __isclose_has_comparison_v<_Tp>>
+inline constexpr bool __isclose_has_comparison2_v = false;
+
+template <class _Tp, class _Up>
+inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> =
+  ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value;
+
+template <class _Tp, class _AbsTol, bool = __isclose_has_comparison2_v<_Tp, _AbsTol>>
+inline constexpr bool __isclose_has_abs_tol_v = false;
+
+template <class _Tp, class _AbsTol>
+inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> =
+  ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>;
+
+template <class _ComplexType, bool = __is_cccl_complex_v<_ComplexType>>
+inline constexpr bool __isclose_has_complex_comparison_v = false;
+
+template <class _ComplexType>
+inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> =
+  __isclose_has_comparison_v<typename _ComplexType::value_type>;
+
+template <class _ComplexType, class _AbsTol, bool = __is_cccl_complex_v<_ComplexType>>
+inline constexpr bool __isclose_has_complex_abs_tol_v = false;
+
+template <class _ComplexType, class _AbsTol>
+inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> =
+  __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept
+{
+  constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
+  auto __tol              = 1.0f;
+  for (int __i = 0; __i < __digits; ++__i)
+  {
+    __tol /= 10.0f;
+  }
+  return __tol;
+}
+
+template <class _Tp>
+_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
+               "cuda::isclose: relative tolerance must be finite and non-negative");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr bool __isclose_compare(
+  const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs));
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr bool
+__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol);
+
+  if (__lhs == __rhs)
+  {
+    return true;
+  }
+  if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs))
+  {
+    return false;
+  }
+  if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs))
+  {
+    return false;
+  }
+
+  return ::cuda::__isclose_compare(
+    ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol);
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept
+{
+  return ::cuda::std::hypot(__real_part, __imag_part);
+}
+
+template <class _ComplexType, class _AbsTol>
+[[nodiscard]] _CCCL_API bool __isclose_complex_impl(
+  const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  using _Value      = typename _ComplexType::value_type;
+  using _Comparison = __isclose_comparison_t<_Value>;
+
+  const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
+  const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
+  const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
+  const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
+  const auto __abs      = static_cast<_Comparison>(__abs_tol);
+
+  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs);
+
+  if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
+  {
+    return true;
+  }
+  if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real)
+      || ::cuda::std::isnan(__rhs_imag))
+  {
+    return false;
+  }
+  if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real)
+      || ::cuda::std::isinf(__rhs_imag))
+  {
+    return false;
+  }
+
+  const auto __diff = ::cuda::__isclose_hypot(
+    static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
+  const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag);
+  return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs);
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
+_CCCL_TEMPLATE(class _Tp, class _AbsTol)
+_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
+[[nodiscard]] _CCCL_API constexpr bool
+isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::__isclose_impl(
+    static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol));
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+}
+
+//! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
+_CCCL_TEMPLATE(class _ComplexType, class _AbsTol)
+_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>)
+[[nodiscard]] _CCCL_API bool
+isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+}
+
+//! @brief Checks whether two complex values are close to each other using a relative tolerance.
+_CCCL_TEMPLATE(class _ComplexType)
+_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
+[[nodiscard]] _CCCL_API bool
+isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+}
+
+//! @brief Checks whether two complex values are close to each other using the default relative tolerance.
+_CCCL_TEMPLATE(class _ComplexType)
+_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
+[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
+{
+  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+}
+
+_CCCL_END_NAMESPACE_CUDA
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA___NUMERIC_ISCLOSE_H
diff --git a/libcudacxx/include/cuda/numeric b/libcudacxx/include/cuda/numeric
index f9beaa7fa7f..9cf3feaf42f 100644
--- a/libcudacxx/include/cuda/numeric
+++ b/libcudacxx/include/cuda/numeric
@@ -22,6 +22,7 @@
 
 #include <cuda/__numeric/add_overflow.h>
 #include <cuda/__numeric/div_overflow.h>
+#include <cuda/__numeric/isclose.h>
 #include <cuda/__numeric/mul_overflow.h>
 #include <cuda/__numeric/narrow.h>
 #include <cuda/__numeric/overflow_cast.h>
diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
new file mode 100644
index 00000000000..25cd3dea157
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
@@ -0,0 +1,181 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__complex_>
+#include <cuda/numeric>
+#include <cuda/std/cassert>
+#include <cuda/std/complex>
+#include <cuda/std/limits>
+#include <cuda/std/type_traits>
+
+#include "test_macros.h"
+
+template <class T, class U, class = void>
+inline constexpr bool has_isclose_v = false;
+
+template <class T, class U>
+inline constexpr bool
+  has_isclose_v<T, U, cuda::std::void_t<decltype(cuda::isclose(cuda::std::declval<T>(), cuda::std::declval<U>()))>> =
+    true;
+
+template <class T, class AbsTol, class = void>
+inline constexpr bool has_isclose_abs_tol_v = false;
+
+template <class T, class AbsTol>
+inline constexpr bool has_isclose_abs_tol_v<
+  T,
+  AbsTol,
+  cuda::std::void_t<decltype(cuda::isclose(
+    cuda::std::declval<T>(), cuda::std::declval<T>(), 0.0f, cuda::std::declval<AbsTol>()))>> = true;
+
+template <class T>
+TEST_FUNC constexpr float default_rel_tol()
+{
+  constexpr auto digits = (cuda::std::numeric_limits<T>::max_digits10 + 1) / 2;
+  auto tol              = 1.0f;
+  for (int i = 0; i < digits; ++i)
+  {
+    tol /= 10.0f;
+  }
+  return tol;
+}
+
+template <class T>
+TEST_FUNC constexpr bool test_floating_point()
+{
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(T{}, T{})));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
+
+  constexpr auto tol = default_rel_tol<T>();
+  assert(cuda::isclose(T{1}, T{1}));
+  assert(cuda::isclose(T{1}, T{1} + tol / T{2}));
+  assert(!cuda::isclose(T{1}, T{1} + tol * T{2}));
+
+  assert(cuda::isclose(T{10}, T{11}, 0.1f));
+  assert(cuda::isclose(T{11}, T{10}, 0.1f));
+  assert(!cuda::isclose(T{10}, T{12}, 0.1f));
+
+  assert(!cuda::isclose(T{0}, tol / T{2}));
+  assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5}));
+  assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25}));
+
+  const auto inf = cuda::std::numeric_limits<T>::infinity();
+  const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
+  assert(cuda::isclose(inf, inf));
+  assert(cuda::isclose(-inf, -inf));
+  assert(!cuda::isclose(inf, -inf));
+  assert(!cuda::isclose(inf, T{1}, 10.0f));
+  assert(!cuda::isclose(nan, nan));
+  assert(!cuda::isclose(nan, T{}));
+
+  return true;
+}
+
+TEST_FUNC constexpr bool test_integral()
+{
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f, 0.0))>);
+  static_assert(noexcept(cuda::isclose(0, 0)));
+  static_assert(noexcept(cuda::isclose(0, 0, 0.0f)));
+  static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0)));
+
+  assert(cuda::isclose(1, 1));
+  assert(cuda::isclose(1u, 1u));
+  assert(!cuda::isclose(1, 2));
+  assert(cuda::isclose(100, 101, 0.02f));
+  assert(cuda::isclose(101, 100, 0.02f));
+  assert(!cuda::isclose(100, 103, 0.02f));
+  assert(cuda::isclose(0, 1, 0.0f, 1.0));
+  assert(!cuda::isclose(0, 1, 0.0f, 0.5));
+
+  static_assert(!has_isclose_v<int, unsigned int>);
+  static_assert(!has_isclose_v<float, double>);
+  static_assert(has_isclose_abs_tol_v<double, float>);
+  static_assert(has_isclose_abs_tol_v<double, double>);
+  static_assert(!has_isclose_abs_tol_v<float, double>);
+  static_assert(has_isclose_abs_tol_v<int, float>);
+  static_assert(has_isclose_abs_tol_v<int, double>);
+
+  return true;
+}
+
+template <class Complex>
+TEST_FUNC void test_complex()
+{
+  using T = typename Complex::value_type;
+
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{})));
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{})));
+
+  assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}}));
+  assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f));
+  assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f));
+
+  // PEP 485 uses complex magnitudes, not component-wise scalar comparisons.
+  assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f));
+
+  assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}));
+  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5}));
+  assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25}));
+
+  const auto inf = cuda::std::numeric_limits<T>::infinity();
+  const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
+  assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}}));
+  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f));
+  assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}}));
+  assert(!cuda::isclose(Complex{nan, T{}}, Complex{}));
+}
+
+TEST_FUNC void test_mixed_complex()
+{
+  static_assert(!has_isclose_v<cuda::std::complex<float>, cuda::std::complex<double>>);
+  static_assert(!has_isclose_v<cuda::complex<float>, cuda::complex<double>>);
+  static_assert(!has_isclose_v<cuda::complex<float>, cuda::std::complex<float>>);
+  static_assert(has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
+  static_assert(has_isclose_abs_tol_v<cuda::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::std::complex<float>, double>);
+  static_assert(!has_isclose_abs_tol_v<cuda::complex<float>, double>);
+}
+
+TEST_FUNC constexpr bool test()
+{
+  test_floating_point<float>();
+  test_floating_point<double>();
+#if _CCCL_HAS_LONG_DOUBLE()
+  test_floating_point<long double>();
+#endif // _CCCL_HAS_LONG_DOUBLE()
+
+  test_integral();
+
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+
+  test_complex<cuda::std::complex<float>>();
+  test_complex<cuda::std::complex<double>>();
+  test_complex<cuda::complex<float>>();
+  test_complex<cuda::complex<double>>();
+  test_mixed_complex();
+
+  return 0;
+}

From f89bda2ea2a899372537f442992d576094361b04 Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Tue, 23 Jun 2026 11:13:05 -0700
Subject: [PATCH 02/14] simplifications

---
 libcudacxx/include/cuda/__numeric/isclose.h | 66 ++++++++-------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 76e7d7c91e8..4559c34f7de 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -24,6 +24,7 @@
 #include <cuda/__cmath/ceil_div.h>
 #include <cuda/__complex/get_real_imag.h>
 #include <cuda/__complex/traits.h>
+#include <cuda/__utility/in_range.h>
 #include <cuda/std/__algorithm/max.h>
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
@@ -80,7 +81,7 @@ inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, tru
   __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
 
 template <class _Tp>
-[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept
+[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept
 {
   constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
   auto __tol              = 1.0f;
@@ -91,49 +92,28 @@ template <class _Tp>
   return __tol;
 }
 
-template <class _Tp>
-_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept
-{
-  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
-               "cuda::isclose: relative tolerance must be finite and non-negative");
-  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
-               "cuda::isclose: absolute tolerance must be finite and non-negative");
-}
-
-template <class _Tp>
-[[nodiscard]] _CCCL_API constexpr bool __isclose_compare(
-  const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept
-{
-  return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs));
-}
-
 template <class _Tp>
 [[nodiscard]] _CCCL_API constexpr bool
 __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
-  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol);
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
 
   if (__lhs == __rhs)
   {
     return true;
   }
-  if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs))
+  if (!::cuda::std::isfinite(__lhs) || !::cuda::std::isfinite(__rhs))
   {
     return false;
   }
-  if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs))
-  {
-    return false;
-  }
-
-  return ::cuda::__isclose_compare(
-    ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol);
-}
-
-template <class _Tp>
-[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept
-{
-  return ::cuda::std::hypot(__real_part, __imag_part);
+  const auto __diff      = ::cuda::std::abs(__lhs - __rhs);
+  const auto __lhs_abs   = ::cuda::std::abs(__lhs);
+  const auto __rhs_abs   = ::cuda::std::abs(__rhs);
+  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
 }
 
 template <class _ComplexType, class _AbsTol>
@@ -143,14 +123,17 @@ template <class _ComplexType, class _AbsTol>
   using _Value      = typename _ComplexType::value_type;
   using _Comparison = __isclose_comparison_t<_Value>;
 
+  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
+               "cuda::isclose: relative tolerance must be finite and non-negative");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
+
   const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
   const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
   const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
   const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
   const auto __abs      = static_cast<_Comparison>(__abs_tol);
 
-  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs);
-
   if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
   {
     return true;
@@ -166,13 +149,16 @@ template <class _ComplexType, class _AbsTol>
     return false;
   }
 
-  const auto __diff = ::cuda::__isclose_hypot(
+  const auto __diff = ::cuda::std::hypot(
     static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
-  const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag);
-  const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag);
-  return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs);
+  const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag);
+  return __diff
+      <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)));
 }
 
+//----------------------------------------------------------------------------------------------------------------------
+
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
 _CCCL_TEMPLATE(class _Tp, class _AbsTol)
 _CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
@@ -199,7 +185,7 @@ _CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
 {
   using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
@@ -227,7 +213,7 @@ _CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
 [[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
   using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
 }
 
 _CCCL_END_NAMESPACE_CUDA

From 3f07bfda8923054891c212da07fd3ffa153e017f Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Tue, 23 Jun 2026 18:06:54 -0700
Subject: [PATCH 03/14] unit test

---
 libcudacxx/include/cuda/__numeric/isclose.h   | 222 ++++++++++--------
 .../cuda/numeric/isclose/isclose.pass.cpp     | 168 +++++++------
 2 files changed, 214 insertions(+), 176 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 4559c34f7de..41a5acf8f8e 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -22,85 +22,52 @@
 #endif // no system header
 
 #include <cuda/__cmath/ceil_div.h>
+#include <cuda/__cmath/uabs.h>
 #include <cuda/__complex/get_real_imag.h>
 #include <cuda/__complex/traits.h>
+#include <cuda/__type_traits/is_floating_point.h>
 #include <cuda/__utility/in_range.h>
 #include <cuda/std/__algorithm/max.h>
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
 #include <cuda/std/__cmath/isfinite.h>
-#include <cuda/std/__cmath/isinf.h>
-#include <cuda/std/__cmath/isnan.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__limits/numeric_limits.h>
+#include <cuda/std/__type_traits/conditional.h>
+#include <cuda/std/__type_traits/is_extended_floating_point.h>
+#include <cuda/std/__type_traits/is_integer.h>
 #include <cuda/std/__type_traits/is_same.h>
-#include <cuda/std/__type_traits/promote.h>
+#include <cuda/std/__type_traits/is_signed_integer.h>
+#include <cuda/std/__type_traits/make_unsigned.h>
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA
 
-template <class _Tp>
-using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>;
+template <typename _Tp>
+using __isclose_compare_t = ::cuda::std::
+  conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>;
 
-template <class _Tp, class _Up>
-using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>;
-
-template <class _Tp, bool = ::cuda::std::__promote<_Tp>::value>
-inline constexpr bool __isclose_has_comparison_v = false;
-
-template <class _Tp>
-inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true;
-
-template <class _Tp, class _Up, bool = __isclose_has_comparison_v<_Tp>>
-inline constexpr bool __isclose_has_comparison2_v = false;
-
-template <class _Tp, class _Up>
-inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> =
-  ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value;
-
-template <class _Tp, class _AbsTol, bool = __isclose_has_comparison2_v<_Tp, _AbsTol>>
-inline constexpr bool __isclose_has_abs_tol_v = false;
-
-template <class _Tp, class _AbsTol>
-inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> =
-  ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>;
-
-template <class _ComplexType, bool = __is_cccl_complex_v<_ComplexType>>
-inline constexpr bool __isclose_has_complex_comparison_v = false;
-
-template <class _ComplexType>
-inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> =
-  __isclose_has_comparison_v<typename _ComplexType::value_type>;
-
-template <class _ComplexType, class _AbsTol, bool = __is_cccl_complex_v<_ComplexType>>
-inline constexpr bool __isclose_has_complex_abs_tol_v = false;
-
-template <class _ComplexType, class _AbsTol>
-inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> =
-  __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
-
-template <class _Tp>
+template <typename _Tp>
 [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept
 {
   constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
-  auto __tol              = 1.0f;
+  auto __exp              = 1.0f;
   for (int __i = 0; __i < __digits; ++__i)
   {
-    __tol /= 10.0f;
+    __exp *= 10.0f;
   }
-  return __tol;
+  return 1.0f / __exp;
 }
 
-template <class _Tp>
+template <typename _Tp>
 [[nodiscard]] _CCCL_API constexpr bool
-__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+__isclose_fp_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
   _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
                "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
   _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
                "cuda::isclose: absolute tolerance must be finite and non-negative");
-
   if (__lhs == __rhs)
   {
     return true;
@@ -116,104 +83,159 @@ __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _T
   return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
 }
 
-template <class _ComplexType, class _AbsTol>
-[[nodiscard]] _CCCL_API bool __isclose_complex_impl(
+template <typename _ComplexType, typename _AbsTol>
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool __isclose_complex_impl(
   const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
-  using _Value      = typename _ComplexType::value_type;
-  using _Comparison = __isclose_comparison_t<_Value>;
-
-  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
-               "cuda::isclose: relative tolerance must be finite and non-negative");
-  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0},
+  using __scalar_t  = typename _ComplexType::value_type;
+  using __compare_t = __isclose_compare_t<__scalar_t>;
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= __scalar_t{0},
                "cuda::isclose: absolute tolerance must be finite and non-negative");
 
-  const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
-  const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
-  const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
-  const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
-  const auto __abs      = static_cast<_Comparison>(__abs_tol);
+  const auto __lhs_real = static_cast<__compare_t>(::cuda::__get_real(__lhs));
+  const auto __lhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__lhs));
+  const auto __rhs_real = static_cast<__compare_t>(::cuda::__get_real(__rhs));
+  const auto __rhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__rhs));
+  const auto __abs      = static_cast<__compare_t>(__abs_tol);
 
   if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
   {
     return true;
   }
-  if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real)
-      || ::cuda::std::isnan(__rhs_imag))
+  if (!::cuda::std::isfinite(__lhs_real) || !::cuda::std::isfinite(__lhs_imag) || !::cuda::std::isfinite(__rhs_real)
+      || !::cuda::std::isfinite(__rhs_imag))
   {
     return false;
   }
-  if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real)
-      || ::cuda::std::isinf(__rhs_imag))
+  const auto __diff      = ::cuda::std::hypot(__lhs_real - __rhs_real, __lhs_imag - __rhs_imag);
+  const auto __lhs_abs   = ::cuda::std::hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs   = ::cuda::std::hypot(__rhs_real, __rhs_imag);
+  const auto __rel_value = __rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs);
+  return __diff <= ::cuda::std::max(__abs, __rel_value);
+}
+
+template <typename _Tp>
+[[nodiscard]] _CCCL_API constexpr ::cuda::std::make_unsigned_t<_Tp>
+__safe_diff(const _Tp __lhs, const _Tp __rhs) noexcept
+{
+  using __unsigned_t           = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs         = ::cuda::uabs(__lhs);
+  const auto __rhs_abs         = ::cuda::uabs(__rhs);
+  const auto __is_lhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __lhs < _Tp{0};
+  const auto __is_rhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __rhs < _Tp{0};
+
+  if (__is_lhs_negative != __is_rhs_negative)
   {
-    return false;
+    return static_cast<__unsigned_t>(__lhs_abs + __rhs_abs);
   }
+  return (__lhs_abs < __rhs_abs)
+         ? static_cast<__unsigned_t>(__rhs_abs - __lhs_abs)
+         : static_cast<__unsigned_t>(__lhs_abs - __rhs_abs);
+}
 
-  const auto __diff = ::cuda::std::hypot(
-    static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
-  const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag);
-  const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag);
-  return __diff
-      <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)));
+template <typename _Tp>
+[[nodiscard]] _CCCL_API constexpr bool
+__isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  if constexpr (::cuda::std::__cccl_is_signed_integer_v<_Tp>)
+  {
+    _CCCL_ASSERT(__abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be non-negative");
+  }
+  using __unsigned_t     = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs   = ::cuda::uabs(__rhs);
+  const auto __diff      = ::cuda::__safe_diff(__lhs, __rhs);
+  const auto __abs       = static_cast<__unsigned_t>(__abs_tol);
+  const auto __rel_value = static_cast<__unsigned_t>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::max(__abs, __rel_value);
 }
 
 //----------------------------------------------------------------------------------------------------------------------
+// Public API
+
+// Scalar overloads
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
-_CCCL_TEMPLATE(class _Tp, class _AbsTol)
-_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool
-isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::__isclose_impl(
-    static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol));
+  if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
+  {
+    return ::cuda::__isclose_integer_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+  }
+  else
+  {
+    using __value_t = __isclose_compare_t<_Tp>;
+    return ::cuda::__isclose_fp_impl(
+      static_cast<__value_t>(__lhs), static_cast<__value_t>(__rhs), __rel_tol, static_cast<__value_t>(__abs_tol));
+  }
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
-_CCCL_TEMPLATE(class _Tp)
-_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0});
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
-_CCCL_TEMPLATE(class _Tp)
-_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
+  if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
+  {
+    return __lhs == __rhs;
+  }
+  else
+  {
+    constexpr auto __rel_tol = ::cuda::__isclose_default_relative_tolerance<_Tp>();
+    return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0});
+  }
 }
 
+// Complex overloads
+
+template <typename _Tp, typename _AbsTol, bool = __is_any_complex_v<_Tp>>
+inline constexpr bool __isclose_complex_comparison_v = false;
+
+template <typename _Tp, typename _AbsTol>
+inline constexpr bool __isclose_complex_comparison_v<_Tp, _AbsTol, true> =
+  ::cuda::std::is_same_v<typename _Tp::value_type, _AbsTol>;
+
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
-_CCCL_TEMPLATE(class _ComplexType, class _AbsTol)
-_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>)
-[[nodiscard]] _CCCL_API bool
+_CCCL_TEMPLATE(typename _ComplexType, typename _AbsTol)
+_CCCL_REQUIRES(__isclose_complex_comparison_v<_ComplexType, _AbsTol>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
   return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol);
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative tolerance.
-_CCCL_TEMPLATE(class _ComplexType)
-_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
-[[nodiscard]] _CCCL_API bool
+_CCCL_TEMPLATE(typename _ComplexType)
+_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+  using __scalar_t = typename _ComplexType::value_type;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, __scalar_t{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using the default relative tolerance.
-_CCCL_TEMPLATE(class _ComplexType)
-_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
-[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
+_CCCL_TEMPLATE(typename _ComplexType)
+_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
-  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
+  using __scalar_t = typename _ComplexType::value_type;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<__scalar_t>(), __scalar_t{0});
 }
 
 _CCCL_END_NAMESPACE_CUDA
diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
index 25cd3dea157..54cef3f3761 100644
--- a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
@@ -15,15 +15,11 @@
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
-#include "test_macros.h"
-
-template <class T, class U, class = void>
-inline constexpr bool has_isclose_v = false;
+#if _CCCL_HAS_HOST_STD_LIB()
+#  include <complex>
+#endif // _CCCL_HAS_HOST_STD_LIB()
 
-template <class T, class U>
-inline constexpr bool
-  has_isclose_v<T, U, cuda::std::void_t<decltype(cuda::isclose(cuda::std::declval<T>(), cuda::std::declval<U>()))>> =
-    true;
+#include "test_macros.h"
 
 template <class T, class AbsTol, class = void>
 inline constexpr bool has_isclose_abs_tol_v = false;
@@ -35,18 +31,6 @@ inline constexpr bool has_isclose_abs_tol_v<
   cuda::std::void_t<decltype(cuda::isclose(
     cuda::std::declval<T>(), cuda::std::declval<T>(), 0.0f, cuda::std::declval<AbsTol>()))>> = true;
 
-template <class T>
-TEST_FUNC constexpr float default_rel_tol()
-{
-  constexpr auto digits = (cuda::std::numeric_limits<T>::max_digits10 + 1) / 2;
-  auto tol              = 1.0f;
-  for (int i = 0; i < digits; ++i)
-  {
-    tol /= 10.0f;
-  }
-  return tol;
-}
-
 template <class T>
 TEST_FUNC constexpr bool test_floating_point()
 {
@@ -57,57 +41,40 @@ TEST_FUNC constexpr bool test_floating_point()
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
 
-  constexpr auto tol = default_rel_tol<T>();
+  constexpr auto tol = cuda::__isclose_default_relative_tolerance<T>();
   assert(cuda::isclose(T{1}, T{1}));
-  assert(cuda::isclose(T{1}, T{1} + tol / T{2}));
-  assert(!cuda::isclose(T{1}, T{1} + tol * T{2}));
+  assert(cuda::isclose(T{1}, T{1} + static_cast<T>(tol / 2.0f)));
+  assert(!cuda::isclose(T{1}, T{1} + static_cast<T>(tol * 2.0f)));
 
   assert(cuda::isclose(T{10}, T{11}, 0.1f));
   assert(cuda::isclose(T{11}, T{10}, 0.1f));
   assert(!cuda::isclose(T{10}, T{12}, 0.1f));
 
-  assert(!cuda::isclose(T{0}, tol / T{2}));
-  assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5}));
-  assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25}));
-
   const auto inf = cuda::std::numeric_limits<T>::infinity();
   const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
   assert(cuda::isclose(inf, inf));
   assert(cuda::isclose(-inf, -inf));
   assert(!cuda::isclose(inf, -inf));
-  assert(!cuda::isclose(inf, T{1}, 10.0f));
   assert(!cuda::isclose(nan, nan));
   assert(!cuda::isclose(nan, T{}));
-
   return true;
 }
 
+template <typename T>
 TEST_FUNC constexpr bool test_integral()
 {
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0))>);
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f))>);
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f, 0.0))>);
-  static_assert(noexcept(cuda::isclose(0, 0)));
-  static_assert(noexcept(cuda::isclose(0, 0, 0.0f)));
-  static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0)));
-
-  assert(cuda::isclose(1, 1));
-  assert(cuda::isclose(1u, 1u));
-  assert(!cuda::isclose(1, 2));
-  assert(cuda::isclose(100, 101, 0.02f));
-  assert(cuda::isclose(101, 100, 0.02f));
-  assert(!cuda::isclose(100, 103, 0.02f));
-  assert(cuda::isclose(0, 1, 0.0f, 1.0));
-  assert(!cuda::isclose(0, 1, 0.0f, 0.5));
-
-  static_assert(!has_isclose_v<int, unsigned int>);
-  static_assert(!has_isclose_v<float, double>);
-  static_assert(has_isclose_abs_tol_v<double, float>);
-  static_assert(has_isclose_abs_tol_v<double, double>);
-  static_assert(!has_isclose_abs_tol_v<float, double>);
-  static_assert(has_isclose_abs_tol_v<int, float>);
-  static_assert(has_isclose_abs_tol_v<int, double>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(T{}, T{})));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
 
+  assert(cuda::isclose(T{1}, T{1}));
+  assert(!cuda::isclose(T{1}, T{2}));
+  assert(cuda::isclose(T{100}, T{101}, 0.02f));
+  assert(cuda::isclose(T{101}, T{100}, 0.02f));
+  assert(!cuda::isclose(T{100}, T{103}, 0.02f));
   return true;
 }
 
@@ -115,42 +82,47 @@ template <class Complex>
 TEST_FUNC void test_complex()
 {
   using T = typename Complex::value_type;
-
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}))>);
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f))>);
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))>);
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{})));
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f)));
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{})));
+  static_assert(noexcept(cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>())));
+  static_assert(noexcept(cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>(), 0.0f)));
+  static_assert(noexcept(
+    cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>(), 0.0f, cuda::std::declval<T>())));
 
   assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}}));
   assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f));
   assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f));
 
-  // PEP 485 uses complex magnitudes, not component-wise scalar comparisons.
-  assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f));
-
   assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}));
-  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5}));
+  auto abs_tol = T{0.5};
+#if _LIBCUDACXX_HAS_NVBF16()
+  if constexpr (cuda::std::is_same_v<T, __nv_bfloat16>)
+  {
+    abs_tol = T{0.51};
+  }
+#endif // _LIBCUDACXX_HAS_NVBF16()
+  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, abs_tol));
   assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25}));
 
   const auto inf = cuda::std::numeric_limits<T>::infinity();
   const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
   assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}}));
-  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f));
+  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 1.0f));
   assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}}));
   assert(!cuda::isclose(Complex{nan, T{}}, Complex{}));
 }
 
-TEST_FUNC void test_mixed_complex()
+TEST_FUNC constexpr void test_invalid_complex_cases()
 {
-  static_assert(!has_isclose_v<cuda::std::complex<float>, cuda::std::complex<double>>);
-  static_assert(!has_isclose_v<cuda::complex<float>, cuda::complex<double>>);
-  static_assert(!has_isclose_v<cuda::complex<float>, cuda::std::complex<float>>);
-  static_assert(has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
-  static_assert(has_isclose_abs_tol_v<cuda::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::complex<double>, float>);
   static_assert(!has_isclose_abs_tol_v<cuda::std::complex<float>, double>);
   static_assert(!has_isclose_abs_tol_v<cuda::complex<float>, double>);
+#if _CCCL_HAS_HOST_STD_LIB()
+  static_assert(!has_isclose_abs_tol_v<std::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<std::complex<float>, double>);
+#endif // _CCCL_HAS_HOST_STD_LIB()
 }
 
 TEST_FUNC constexpr bool test()
@@ -160,22 +132,66 @@ TEST_FUNC constexpr bool test()
 #if _CCCL_HAS_LONG_DOUBLE()
   test_floating_point<long double>();
 #endif // _CCCL_HAS_LONG_DOUBLE()
+#if _CCCL_HAS_FLOAT128()
+  test_floating_point<__float128>();
+#endif // _CCCL_HAS_FLOAT128()
+  test_integral<signed char>();
+  test_integral<unsigned char>();
+  test_integral<short>();
+  test_integral<unsigned short>();
+  test_integral<int>();
+  test_integral<unsigned>();
+  test_integral<long>();
+  test_integral<unsigned long>();
+  test_integral<long long>();
+  test_integral<unsigned long long>();
+#if _CCCL_HAS_INT128()
+  test_integral<__int128_t>();
+  test_integral<__uint128_t>();
+#endif // _CCCL_HAS_INT128()
+
+  test_invalid_complex_cases();
+  return true;
+}
 
-  test_integral();
+template <template <typename> class Complex>
+TEST_FUNC void test_complex_types_runtime()
+{
+  test_complex<Complex<float>>();
+  test_complex<Complex<double>>();
+#if _CCCL_HAS_LONG_DOUBLE()
+  test_complex<Complex<long double>>();
+#endif // _CCCL_HAS_LONG_DOUBLE()
+  // complex__float128 support requires std::hypot overload
+}
 
+TEST_FUNC bool test_runtime()
+{
+#if _LIBCUDACXX_HAS_NVFP16()
+  test_floating_point<__half>();
+#endif // _LIBCUDACXX_HAS_NVFP16()
+#if _LIBCUDACXX_HAS_NVBF16()
+  test_floating_point<__nv_bfloat16>();
+#endif // _LIBCUDACXX_HAS_NVBF16()
+
+  test_complex_types_runtime<cuda::std::complex>();
+#if _LIBCUDACXX_HAS_NVFP16()
+  test_complex<cuda::std::complex<__half>>();
+#endif // _LIBCUDACXX_HAS_NVFP16()
+#if _LIBCUDACXX_HAS_NVBF16()
+  test_complex<cuda::std::complex<__nv_bfloat16>>();
+#endif // _LIBCUDACXX_HAS_NVBF16()
+  test_complex_types_runtime<cuda::complex>();
+#if _CCCL_HAS_HOST_STD_LIB()
+  NV_IF_TARGET(NV_IS_HOST, (test_complex_types_runtime<std::complex>();))
+#endif // _CCCL_HAS_HOST_STD_LIB()
   return true;
 }
 
 int main(int, char**)
 {
-  test();
+  assert(test());
+  assert(test_runtime());
   static_assert(test());
-
-  test_complex<cuda::std::complex<float>>();
-  test_complex<cuda::std::complex<double>>();
-  test_complex<cuda::complex<float>>();
-  test_complex<cuda::complex<double>>();
-  test_mixed_complex();
-
   return 0;
 }

From 1b063b9d18ab846bfd72812b88ad5fdea86dba7d Mon Sep 17 00:00:00 2001
From: Federico Busato <50413820+fbusato@users.noreply.github.com>
Date: Wed, 24 Jun 2026 09:54:18 -0700
Subject: [PATCH 04/14] Update libcudacxx/include/cuda/__numeric/isclose.h

Co-authored-by: Jacob Faibussowitsch <jacob.fai@gmail.com>
---
 libcudacxx/include/cuda/__numeric/isclose.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 41a5acf8f8e..49d7ebb1248 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -45,7 +45,7 @@
 _CCCL_BEGIN_NAMESPACE_CUDA
 
 template <typename _Tp>
-using __isclose_compare_t = ::cuda::std::
+using __isclose_compare_t _CCCL_NODEBUG_ALIAS = ::cuda::std::
   conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>;
 
 template <typename _Tp>

From d7f286b1fd73ce04b38729fb41362b3d58f4b60c Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Wed, 24 Jun 2026 10:53:40 -0700
Subject: [PATCH 05/14] a few improvements

---
 libcudacxx/include/cuda/__numeric/isclose.h | 82 +++++++++++++++------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 49d7ebb1248..ad7efb13948 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -31,6 +31,7 @@
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
 #include <cuda/std/__cmath/isfinite.h>
+#include <cuda/std/__cmath/min_max.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__limits/numeric_limits.h>
 #include <cuda/std/__type_traits/conditional.h>
@@ -76,19 +77,22 @@ __isclose_fp_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const
   {
     return false;
   }
-  const auto __diff      = ::cuda::std::abs(__lhs - __rhs);
-  const auto __lhs_abs   = ::cuda::std::abs(__lhs);
-  const auto __rhs_abs   = ::cuda::std::abs(__rhs);
-  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
-  return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
+  const auto __diff      = ::cuda::std::fabs(__lhs - __rhs);
+  const auto __lhs_abs   = ::cuda::std::fabs(__lhs);
+  const auto __rhs_abs   = ::cuda::std::fabs(__rhs);
+  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::fmax(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::fmax(__abs_tol, __rel_value);
 }
 
 template <typename _ComplexType, typename _AbsTol>
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool __isclose_complex_impl(
   const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
-  using __scalar_t  = typename _ComplexType::value_type;
-  using __compare_t = __isclose_compare_t<__scalar_t>;
+  using __scalar_t _CCCL_NODEBUG_ALIAS  = typename _ComplexType::value_type;
+  using __compare_t _CCCL_NODEBUG_ALIAS = __isclose_compare_t<__scalar_t>;
+#if _CCCL_HAS_FLOAT128()
+  static_assert(!::cuda::std::is_same_v<__scalar_t, __float128>, "cuda::isclose: __float128 is not supported");
+#endif // _CCCL_HAS_FLOAT128()
   _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
                "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
   _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= __scalar_t{0},
@@ -112,19 +116,19 @@ template <typename _ComplexType, typename _AbsTol>
   const auto __diff      = ::cuda::std::hypot(__lhs_real - __rhs_real, __lhs_imag - __rhs_imag);
   const auto __lhs_abs   = ::cuda::std::hypot(__lhs_real, __lhs_imag);
   const auto __rhs_abs   = ::cuda::std::hypot(__rhs_real, __rhs_imag);
-  const auto __rel_value = __rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs);
-  return __diff <= ::cuda::std::max(__abs, __rel_value);
+  const auto __rel_value = __rel_tol * ::cuda::std::fmax(__lhs_abs, __rhs_abs);
+  return __diff <= ::cuda::std::fmax(__abs, __rel_value);
 }
 
 template <typename _Tp>
 [[nodiscard]] _CCCL_API constexpr ::cuda::std::make_unsigned_t<_Tp>
-__safe_diff(const _Tp __lhs, const _Tp __rhs) noexcept
+__safe_abs_diff(const _Tp __lhs, const _Tp __rhs) noexcept
 {
-  using __unsigned_t           = ::cuda::std::make_unsigned_t<_Tp>;
-  const auto __lhs_abs         = ::cuda::uabs(__lhs);
-  const auto __rhs_abs         = ::cuda::uabs(__rhs);
-  const auto __is_lhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __lhs < _Tp{0};
-  const auto __is_rhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __rhs < _Tp{0};
+  using __unsigned_t _CCCL_NODEBUG_ALIAS = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs                   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs                   = ::cuda::uabs(__rhs);
+  const auto __is_lhs_negative           = ::cuda::std::is_signed_v<_Tp> && __lhs < _Tp{0};
+  const auto __is_rhs_negative           = ::cuda::std::is_signed_v<_Tp> && __rhs < _Tp{0};
 
   if (__is_lhs_negative != __is_rhs_negative)
   {
@@ -145,11 +149,11 @@ __isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol,
   {
     _CCCL_ASSERT(__abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be non-negative");
   }
-  using __unsigned_t     = ::cuda::std::make_unsigned_t<_Tp>;
-  const auto __lhs_abs   = ::cuda::uabs(__lhs);
-  const auto __rhs_abs   = ::cuda::uabs(__rhs);
-  const auto __diff      = ::cuda::__safe_diff(__lhs, __rhs);
-  const auto __abs       = static_cast<__unsigned_t>(__abs_tol);
+  using __unsigned_t _CCCL_NODEBUG_ALIAS = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs                   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs                   = ::cuda::uabs(__rhs);
+  const auto __diff                      = ::cuda::__safe_abs_diff(__lhs, __rhs);
+  const auto __abs                       = static_cast<__unsigned_t>(__abs_tol);
   const auto __rel_value = static_cast<__unsigned_t>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
   return __diff <= ::cuda::std::max(__abs, __rel_value);
 }
@@ -160,6 +164,12 @@ __isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol,
 // Scalar overloads
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @param __abs_tol The absolute tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool
@@ -167,17 +177,22 @@ isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs
 {
   if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
   {
-    return ::cuda::__isclose_integer_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+    return ::cuda::__isclose_integer_impl(+__lhs, +__rhs, __rel_tol, +__abs_tol);
   }
   else
   {
-    using __value_t = __isclose_compare_t<_Tp>;
+    using __value_t _CCCL_NODEBUG_ALIAS = __isclose_compare_t<_Tp>;
     return ::cuda::__isclose_fp_impl(
       static_cast<__value_t>(__lhs), static_cast<__value_t>(__rhs), __rel_tol, static_cast<__value_t>(__abs_tol));
   }
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
@@ -186,6 +201,10 @@ _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_poin
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
@@ -211,6 +230,12 @@ inline constexpr bool __isclose_complex_comparison_v<_Tp, _AbsTol, true> =
   ::cuda::std::is_same_v<typename _Tp::value_type, _AbsTol>;
 
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @param __abs_tol The absolute tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType, typename _AbsTol)
 _CCCL_REQUIRES(__isclose_complex_comparison_v<_ComplexType, _AbsTol>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool
@@ -220,21 +245,30 @@ isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType)
 _CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
 {
-  using __scalar_t = typename _ComplexType::value_type;
+  using __scalar_t _CCCL_NODEBUG_ALIAS = typename _ComplexType::value_type;
   return ::cuda::isclose(__lhs, __rhs, __rel_tol, __scalar_t{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using the default relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType)
 _CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
-  using __scalar_t = typename _ComplexType::value_type;
+  using __scalar_t _CCCL_NODEBUG_ALIAS = typename _ComplexType::value_type;
   return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<__scalar_t>(), __scalar_t{0});
 }
 

From 31935cd964716e0edec46a030bd4756fa084b40c Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Mon, 22 Jun 2026 12:48:46 -0700
Subject: [PATCH 06/14] draft

---
 docs/libcudacxx/extended_api/numeric.rst      |   6 +
 .../extended_api/numeric/isclose.rst          | 105 ++++++++
 libcudacxx/include/cuda/__numeric/isclose.h   | 237 ++++++++++++++++++
 libcudacxx/include/cuda/numeric               |   1 +
 .../cuda/numeric/isclose/isclose.pass.cpp     | 181 +++++++++++++
 5 files changed, 530 insertions(+)
 create mode 100644 docs/libcudacxx/extended_api/numeric/isclose.rst
 create mode 100644 libcudacxx/include/cuda/__numeric/isclose.h
 create mode 100644 libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp

diff --git a/docs/libcudacxx/extended_api/numeric.rst b/docs/libcudacxx/extended_api/numeric.rst
index 3a00e877662..7a6957a242c 100644
--- a/docs/libcudacxx/extended_api/numeric.rst
+++ b/docs/libcudacxx/extended_api/numeric.rst
@@ -11,6 +11,7 @@ Numeric
    numeric/saturating_add_overflow
    numeric/div_overflow
    numeric/saturating_div_overflow
+   numeric/isclose
    numeric/mul_overflow
    numeric/saturating_mul_overflow
    numeric/narrow
@@ -59,6 +60,11 @@ Numeric
      - CCCL 3.2.0
      - CUDA 13.2
 
+   * - :ref:`cuda::isclose <libcudacxx-extended-api-numeric-isclose>`
+     - Checks whether two values are approximately equal
+     - CCCL 3.5.0
+     - CUDA 13.5
+
    * - :ref:`cuda::mul_overflow <libcudacxx-extended-api-numeric-mul_overflow>`
      - Performs multiplication with overflow checking
      - CCCL 3.4.0
diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst
new file mode 100644
index 00000000000..2da1f5d8b08
--- /dev/null
+++ b/docs/libcudacxx/extended_api/numeric/isclose.rst
@@ -0,0 +1,105 @@
+.. _libcudacxx-extended-api-numeric-isclose:
+
+``cuda::isclose``
+=================
+
+.. code:: cpp
+
+   template <class T>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept;
+
+   template <class T>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol) noexcept;
+
+   template <class T, class AbsTol>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol, AbsTol abs_tol) noexcept;
+
+   template <class Complex>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
+
+   template <class Complex>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol) noexcept;
+
+   template <class Complex, class AbsTol>
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol, AbsTol abs_tol) noexcept;
+
+``cuda::isclose`` checks whether two values are approximately equal using the weak symmetric comparison described by
+`PEP 485 <https://peps.python.org/pep-0485/>`_:
+
+.. code:: cpp
+
+   abs(lhs - rhs) <= max(abs_tol, rel_tol * max(abs(lhs), abs(rhs)))
+
+The overloads without ``abs_tol`` use ``abs_tol == 0``. The overloads without ``rel_tol`` use a default relative
+tolerance based on the promoted comparison type:
+
+.. code:: cpp
+
+   pow(10, -ceil_div(cuda::std::numeric_limits<Comparison>::max_digits10, 2))
+
+For ``double`` comparisons, this is ``1e-9``, matching the default relative tolerance from PEP 485. Lower- and
+higher-precision comparison types use a correspondingly smaller or larger default.
+
+**Parameters**
+
+- ``lhs``: The first value to compare.
+- ``rhs``: The second value to compare.
+- ``rel_tol``: The relative tolerance. Must be finite and non-negative. Passing ``0`` performs a purely absolute
+  tolerance check when ``abs_tol`` is non-zero.
+- ``abs_tol``: The absolute tolerance. Must be finite and non-negative. This is useful for comparisons near zero. The
+  supplied type may promote to the value comparison type, but may not make the value comparison type wider.
+
+**Return value**
+
+- Returns ``true`` if ``lhs`` and ``rhs`` are close to each other, otherwise returns ``false``.
+
+**Constraints**
+
+- Scalar overloads require ``lhs`` and ``rhs`` to have the same arithmetic type. ``abs_tol`` must be representable in the
+  value comparison type after promotion. For example, ``double`` values may be compared with a ``float`` absolute
+  tolerance, but ``float`` values cannot use a ``double`` absolute tolerance. ``rel_tol`` is always a ``float``.
+- Complex overloads accept ``cuda::std::complex<T>`` and ``cuda::complex<T>`` operands. ``lhs`` and ``rhs`` must have the
+  same complex type, and ``abs_tol`` must be representable in the complex value comparison type after promotion.
+  ``rel_tol`` is always a ``float``.
+
+**Special values**
+
+- NaN is never close to any value, including another NaN.
+- Infinity and negative infinity are only close to themselves.
+- With the default ``abs_tol == 0``, comparisons near zero generally require an explicitly supplied absolute tolerance.
+
+For complex values, ``cuda::isclose`` follows the ``cmath.isclose`` model from PEP 485: the difference and scaling values
+are computed from complex magnitudes, rather than comparing the real and imaginary components independently.
+
+Example
+-------
+
+.. code:: cuda
+
+    #include <cuda/__complex_>
+    #include <cuda/numeric>
+    #include <cuda/std/cassert>
+    #include <cuda/std/complex>
+
+    __global__ void kernel()
+    {
+        assert(cuda::isclose(1.0, 1.0 + 5e-10));
+        assert(!cuda::isclose(1.0, 1.0 + 5e-8));
+
+        assert(!cuda::isclose(0.0, 1e-12));
+        assert(cuda::isclose(0.0, 1e-12, 0.0, 1e-12));
+
+        cuda::std::complex<double> z1{1.0, 1.0};
+        cuda::std::complex<double> z2{2.0, 0.0};
+        assert(cuda::isclose(z1, z2, 0.75));
+
+        cuda::complex<float> z3{1.0f, 1.0f};
+        cuda::complex<float> z4{2.0f, 0.0f};
+        assert(cuda::isclose(z3, z4, 0.75f));
+    }
+
+    int main()
+    {
+        kernel<<<1, 1>>>();
+        cudaDeviceSynchronize();
+    }
diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
new file mode 100644
index 00000000000..76e7d7c91e8
--- /dev/null
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -0,0 +1,237 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___NUMERIC_ISCLOSE_H
+#define _CUDA___NUMERIC_ISCLOSE_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/__cmath/ceil_div.h>
+#include <cuda/__complex/get_real_imag.h>
+#include <cuda/__complex/traits.h>
+#include <cuda/std/__algorithm/max.h>
+#include <cuda/std/__cmath/abs.h>
+#include <cuda/std/__cmath/hypot.h>
+#include <cuda/std/__cmath/isfinite.h>
+#include <cuda/std/__cmath/isinf.h>
+#include <cuda/std/__cmath/isnan.h>
+#include <cuda/std/__concepts/concept_macros.h>
+#include <cuda/std/__limits/numeric_limits.h>
+#include <cuda/std/__type_traits/is_same.h>
+#include <cuda/std/__type_traits/promote.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA
+
+template <class _Tp>
+using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>;
+
+template <class _Tp, class _Up>
+using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>;
+
+template <class _Tp, bool = ::cuda::std::__promote<_Tp>::value>
+inline constexpr bool __isclose_has_comparison_v = false;
+
+template <class _Tp>
+inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true;
+
+template <class _Tp, class _Up, bool = __isclose_has_comparison_v<_Tp>>
+inline constexpr bool __isclose_has_comparison2_v = false;
+
+template <class _Tp, class _Up>
+inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> =
+  ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value;
+
+template <class _Tp, class _AbsTol, bool = __isclose_has_comparison2_v<_Tp, _AbsTol>>
+inline constexpr bool __isclose_has_abs_tol_v = false;
+
+template <class _Tp, class _AbsTol>
+inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> =
+  ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>;
+
+template <class _ComplexType, bool = __is_cccl_complex_v<_ComplexType>>
+inline constexpr bool __isclose_has_complex_comparison_v = false;
+
+template <class _ComplexType>
+inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> =
+  __isclose_has_comparison_v<typename _ComplexType::value_type>;
+
+template <class _ComplexType, class _AbsTol, bool = __is_cccl_complex_v<_ComplexType>>
+inline constexpr bool __isclose_has_complex_abs_tol_v = false;
+
+template <class _ComplexType, class _AbsTol>
+inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> =
+  __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept
+{
+  constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
+  auto __tol              = 1.0f;
+  for (int __i = 0; __i < __digits; ++__i)
+  {
+    __tol /= 10.0f;
+  }
+  return __tol;
+}
+
+template <class _Tp>
+_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
+               "cuda::isclose: relative tolerance must be finite and non-negative");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr bool __isclose_compare(
+  const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs));
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API constexpr bool
+__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol);
+
+  if (__lhs == __rhs)
+  {
+    return true;
+  }
+  if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs))
+  {
+    return false;
+  }
+  if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs))
+  {
+    return false;
+  }
+
+  return ::cuda::__isclose_compare(
+    ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol);
+}
+
+template <class _Tp>
+[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept
+{
+  return ::cuda::std::hypot(__real_part, __imag_part);
+}
+
+template <class _ComplexType, class _AbsTol>
+[[nodiscard]] _CCCL_API bool __isclose_complex_impl(
+  const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  using _Value      = typename _ComplexType::value_type;
+  using _Comparison = __isclose_comparison_t<_Value>;
+
+  const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
+  const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
+  const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
+  const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
+  const auto __abs      = static_cast<_Comparison>(__abs_tol);
+
+  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs);
+
+  if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
+  {
+    return true;
+  }
+  if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real)
+      || ::cuda::std::isnan(__rhs_imag))
+  {
+    return false;
+  }
+  if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real)
+      || ::cuda::std::isinf(__rhs_imag))
+  {
+    return false;
+  }
+
+  const auto __diff = ::cuda::__isclose_hypot(
+    static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
+  const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag);
+  return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs);
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
+_CCCL_TEMPLATE(class _Tp, class _AbsTol)
+_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
+[[nodiscard]] _CCCL_API constexpr bool
+isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::__isclose_impl(
+    static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol));
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+}
+
+//! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+[[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
+{
+  using _Comparison = __isclose_comparison_t<_Tp>;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+}
+
+//! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
+_CCCL_TEMPLATE(class _ComplexType, class _AbsTol)
+_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>)
+[[nodiscard]] _CCCL_API bool
+isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+{
+  return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+}
+
+//! @brief Checks whether two complex values are close to each other using a relative tolerance.
+_CCCL_TEMPLATE(class _ComplexType)
+_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
+[[nodiscard]] _CCCL_API bool
+isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
+{
+  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+}
+
+//! @brief Checks whether two complex values are close to each other using the default relative tolerance.
+_CCCL_TEMPLATE(class _ComplexType)
+_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
+[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
+{
+  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+}
+
+_CCCL_END_NAMESPACE_CUDA
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA___NUMERIC_ISCLOSE_H
diff --git a/libcudacxx/include/cuda/numeric b/libcudacxx/include/cuda/numeric
index f9beaa7fa7f..9cf3feaf42f 100644
--- a/libcudacxx/include/cuda/numeric
+++ b/libcudacxx/include/cuda/numeric
@@ -22,6 +22,7 @@
 
 #include <cuda/__numeric/add_overflow.h>
 #include <cuda/__numeric/div_overflow.h>
+#include <cuda/__numeric/isclose.h>
 #include <cuda/__numeric/mul_overflow.h>
 #include <cuda/__numeric/narrow.h>
 #include <cuda/__numeric/overflow_cast.h>
diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
new file mode 100644
index 00000000000..25cd3dea157
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
@@ -0,0 +1,181 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/__complex_>
+#include <cuda/numeric>
+#include <cuda/std/cassert>
+#include <cuda/std/complex>
+#include <cuda/std/limits>
+#include <cuda/std/type_traits>
+
+#include "test_macros.h"
+
+template <class T, class U, class = void>
+inline constexpr bool has_isclose_v = false;
+
+template <class T, class U>
+inline constexpr bool
+  has_isclose_v<T, U, cuda::std::void_t<decltype(cuda::isclose(cuda::std::declval<T>(), cuda::std::declval<U>()))>> =
+    true;
+
+template <class T, class AbsTol, class = void>
+inline constexpr bool has_isclose_abs_tol_v = false;
+
+template <class T, class AbsTol>
+inline constexpr bool has_isclose_abs_tol_v<
+  T,
+  AbsTol,
+  cuda::std::void_t<decltype(cuda::isclose(
+    cuda::std::declval<T>(), cuda::std::declval<T>(), 0.0f, cuda::std::declval<AbsTol>()))>> = true;
+
+template <class T>
+TEST_FUNC constexpr float default_rel_tol()
+{
+  constexpr auto digits = (cuda::std::numeric_limits<T>::max_digits10 + 1) / 2;
+  auto tol              = 1.0f;
+  for (int i = 0; i < digits; ++i)
+  {
+    tol /= 10.0f;
+  }
+  return tol;
+}
+
+template <class T>
+TEST_FUNC constexpr bool test_floating_point()
+{
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(T{}, T{})));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
+
+  constexpr auto tol = default_rel_tol<T>();
+  assert(cuda::isclose(T{1}, T{1}));
+  assert(cuda::isclose(T{1}, T{1} + tol / T{2}));
+  assert(!cuda::isclose(T{1}, T{1} + tol * T{2}));
+
+  assert(cuda::isclose(T{10}, T{11}, 0.1f));
+  assert(cuda::isclose(T{11}, T{10}, 0.1f));
+  assert(!cuda::isclose(T{10}, T{12}, 0.1f));
+
+  assert(!cuda::isclose(T{0}, tol / T{2}));
+  assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5}));
+  assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25}));
+
+  const auto inf = cuda::std::numeric_limits<T>::infinity();
+  const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
+  assert(cuda::isclose(inf, inf));
+  assert(cuda::isclose(-inf, -inf));
+  assert(!cuda::isclose(inf, -inf));
+  assert(!cuda::isclose(inf, T{1}, 10.0f));
+  assert(!cuda::isclose(nan, nan));
+  assert(!cuda::isclose(nan, T{}));
+
+  return true;
+}
+
+TEST_FUNC constexpr bool test_integral()
+{
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f, 0.0))>);
+  static_assert(noexcept(cuda::isclose(0, 0)));
+  static_assert(noexcept(cuda::isclose(0, 0, 0.0f)));
+  static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0)));
+
+  assert(cuda::isclose(1, 1));
+  assert(cuda::isclose(1u, 1u));
+  assert(!cuda::isclose(1, 2));
+  assert(cuda::isclose(100, 101, 0.02f));
+  assert(cuda::isclose(101, 100, 0.02f));
+  assert(!cuda::isclose(100, 103, 0.02f));
+  assert(cuda::isclose(0, 1, 0.0f, 1.0));
+  assert(!cuda::isclose(0, 1, 0.0f, 0.5));
+
+  static_assert(!has_isclose_v<int, unsigned int>);
+  static_assert(!has_isclose_v<float, double>);
+  static_assert(has_isclose_abs_tol_v<double, float>);
+  static_assert(has_isclose_abs_tol_v<double, double>);
+  static_assert(!has_isclose_abs_tol_v<float, double>);
+  static_assert(has_isclose_abs_tol_v<int, float>);
+  static_assert(has_isclose_abs_tol_v<int, double>);
+
+  return true;
+}
+
+template <class Complex>
+TEST_FUNC void test_complex()
+{
+  using T = typename Complex::value_type;
+
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{})));
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{})));
+
+  assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}}));
+  assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f));
+  assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f));
+
+  // PEP 485 uses complex magnitudes, not component-wise scalar comparisons.
+  assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f));
+
+  assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}));
+  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5}));
+  assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25}));
+
+  const auto inf = cuda::std::numeric_limits<T>::infinity();
+  const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
+  assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}}));
+  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f));
+  assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}}));
+  assert(!cuda::isclose(Complex{nan, T{}}, Complex{}));
+}
+
+TEST_FUNC void test_mixed_complex()
+{
+  static_assert(!has_isclose_v<cuda::std::complex<float>, cuda::std::complex<double>>);
+  static_assert(!has_isclose_v<cuda::complex<float>, cuda::complex<double>>);
+  static_assert(!has_isclose_v<cuda::complex<float>, cuda::std::complex<float>>);
+  static_assert(has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
+  static_assert(has_isclose_abs_tol_v<cuda::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::std::complex<float>, double>);
+  static_assert(!has_isclose_abs_tol_v<cuda::complex<float>, double>);
+}
+
+TEST_FUNC constexpr bool test()
+{
+  test_floating_point<float>();
+  test_floating_point<double>();
+#if _CCCL_HAS_LONG_DOUBLE()
+  test_floating_point<long double>();
+#endif // _CCCL_HAS_LONG_DOUBLE()
+
+  test_integral();
+
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+
+  test_complex<cuda::std::complex<float>>();
+  test_complex<cuda::std::complex<double>>();
+  test_complex<cuda::complex<float>>();
+  test_complex<cuda::complex<double>>();
+  test_mixed_complex();
+
+  return 0;
+}

From c8f401a386947fb1fc07bc7128e31bcfcfc5018e Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Tue, 23 Jun 2026 11:13:05 -0700
Subject: [PATCH 07/14] simplifications

---
 libcudacxx/include/cuda/__numeric/isclose.h | 66 ++++++++-------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 76e7d7c91e8..4559c34f7de 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -24,6 +24,7 @@
 #include <cuda/__cmath/ceil_div.h>
 #include <cuda/__complex/get_real_imag.h>
 #include <cuda/__complex/traits.h>
+#include <cuda/__utility/in_range.h>
 #include <cuda/std/__algorithm/max.h>
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
@@ -80,7 +81,7 @@ inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, tru
   __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
 
 template <class _Tp>
-[[nodiscard]] _CCCL_API constexpr float __isclose_default_rel_tol() noexcept
+[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept
 {
   constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
   auto __tol              = 1.0f;
@@ -91,49 +92,28 @@ template <class _Tp>
   return __tol;
 }
 
-template <class _Tp>
-_CCCL_API constexpr void __isclose_validate_tolerances(const float __rel_tol, const _Tp __abs_tol) noexcept
-{
-  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
-               "cuda::isclose: relative tolerance must be finite and non-negative");
-  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
-               "cuda::isclose: absolute tolerance must be finite and non-negative");
-}
-
-template <class _Tp>
-[[nodiscard]] _CCCL_API constexpr bool __isclose_compare(
-  const _Tp __diff, const _Tp __lhs_abs, const _Tp __rhs_abs, const float __rel_tol, const _Tp __abs_tol) noexcept
-{
-  return __diff <= ::cuda::std::max(__abs_tol, static_cast<_Tp>(__rel_tol) * ::cuda::std::max(__lhs_abs, __rhs_abs));
-}
-
 template <class _Tp>
 [[nodiscard]] _CCCL_API constexpr bool
 __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
-  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs_tol);
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
 
   if (__lhs == __rhs)
   {
     return true;
   }
-  if (::cuda::std::isnan(__lhs) || ::cuda::std::isnan(__rhs))
+  if (!::cuda::std::isfinite(__lhs) || !::cuda::std::isfinite(__rhs))
   {
     return false;
   }
-  if (::cuda::std::isinf(__lhs) || ::cuda::std::isinf(__rhs))
-  {
-    return false;
-  }
-
-  return ::cuda::__isclose_compare(
-    ::cuda::std::abs(__lhs - __rhs), ::cuda::std::abs(__lhs), ::cuda::std::abs(__rhs), __rel_tol, __abs_tol);
-}
-
-template <class _Tp>
-[[nodiscard]] _CCCL_API _Tp __isclose_hypot(const _Tp __real_part, const _Tp __imag_part) noexcept
-{
-  return ::cuda::std::hypot(__real_part, __imag_part);
+  const auto __diff      = ::cuda::std::abs(__lhs - __rhs);
+  const auto __lhs_abs   = ::cuda::std::abs(__lhs);
+  const auto __rhs_abs   = ::cuda::std::abs(__rhs);
+  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
 }
 
 template <class _ComplexType, class _AbsTol>
@@ -143,14 +123,17 @@ template <class _ComplexType, class _AbsTol>
   using _Value      = typename _ComplexType::value_type;
   using _Comparison = __isclose_comparison_t<_Value>;
 
+  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
+               "cuda::isclose: relative tolerance must be finite and non-negative");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0},
+               "cuda::isclose: absolute tolerance must be finite and non-negative");
+
   const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
   const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
   const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
   const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
   const auto __abs      = static_cast<_Comparison>(__abs_tol);
 
-  ::cuda::__isclose_validate_tolerances(__rel_tol, __abs);
-
   if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
   {
     return true;
@@ -166,13 +149,16 @@ template <class _ComplexType, class _AbsTol>
     return false;
   }
 
-  const auto __diff = ::cuda::__isclose_hypot(
+  const auto __diff = ::cuda::std::hypot(
     static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
-  const auto __lhs_abs = ::cuda::__isclose_hypot(__lhs_real, __lhs_imag);
-  const auto __rhs_abs = ::cuda::__isclose_hypot(__rhs_real, __rhs_imag);
-  return ::cuda::__isclose_compare(__diff, __lhs_abs, __rhs_abs, __rel_tol, __abs);
+  const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag);
+  return __diff
+      <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)));
 }
 
+//----------------------------------------------------------------------------------------------------------------------
+
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
 _CCCL_TEMPLATE(class _Tp, class _AbsTol)
 _CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
@@ -199,7 +185,7 @@ _CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
 {
   using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
@@ -227,7 +213,7 @@ _CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
 [[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
   using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_rel_tol<_Comparison>(), _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
 }
 
 _CCCL_END_NAMESPACE_CUDA

From 765a138fea48fe8fc14eff077cb907ad5028302a Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Tue, 23 Jun 2026 18:06:54 -0700
Subject: [PATCH 08/14] unit test

---
 libcudacxx/include/cuda/__numeric/isclose.h   | 222 ++++++++++--------
 .../cuda/numeric/isclose/isclose.pass.cpp     | 168 +++++++------
 2 files changed, 214 insertions(+), 176 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 4559c34f7de..41a5acf8f8e 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -22,85 +22,52 @@
 #endif // no system header
 
 #include <cuda/__cmath/ceil_div.h>
+#include <cuda/__cmath/uabs.h>
 #include <cuda/__complex/get_real_imag.h>
 #include <cuda/__complex/traits.h>
+#include <cuda/__type_traits/is_floating_point.h>
 #include <cuda/__utility/in_range.h>
 #include <cuda/std/__algorithm/max.h>
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
 #include <cuda/std/__cmath/isfinite.h>
-#include <cuda/std/__cmath/isinf.h>
-#include <cuda/std/__cmath/isnan.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__limits/numeric_limits.h>
+#include <cuda/std/__type_traits/conditional.h>
+#include <cuda/std/__type_traits/is_extended_floating_point.h>
+#include <cuda/std/__type_traits/is_integer.h>
 #include <cuda/std/__type_traits/is_same.h>
-#include <cuda/std/__type_traits/promote.h>
+#include <cuda/std/__type_traits/is_signed_integer.h>
+#include <cuda/std/__type_traits/make_unsigned.h>
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA
 
-template <class _Tp>
-using __isclose_comparison_t = ::cuda::std::__promote_t<_Tp>;
+template <typename _Tp>
+using __isclose_compare_t = ::cuda::std::
+  conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>;
 
-template <class _Tp, class _Up>
-using __isclose_comparison2_t = ::cuda::std::__promote_t<__isclose_comparison_t<_Tp>, _Up>;
-
-template <class _Tp, bool = ::cuda::std::__promote<_Tp>::value>
-inline constexpr bool __isclose_has_comparison_v = false;
-
-template <class _Tp>
-inline constexpr bool __isclose_has_comparison_v<_Tp, true> = true;
-
-template <class _Tp, class _Up, bool = __isclose_has_comparison_v<_Tp>>
-inline constexpr bool __isclose_has_comparison2_v = false;
-
-template <class _Tp, class _Up>
-inline constexpr bool __isclose_has_comparison2_v<_Tp, _Up, true> =
-  ::cuda::std::__promote<__isclose_comparison_t<_Tp>, _Up>::value;
-
-template <class _Tp, class _AbsTol, bool = __isclose_has_comparison2_v<_Tp, _AbsTol>>
-inline constexpr bool __isclose_has_abs_tol_v = false;
-
-template <class _Tp, class _AbsTol>
-inline constexpr bool __isclose_has_abs_tol_v<_Tp, _AbsTol, true> =
-  ::cuda::std::is_same_v<__isclose_comparison_t<_Tp>, __isclose_comparison2_t<_Tp, _AbsTol>>;
-
-template <class _ComplexType, bool = __is_cccl_complex_v<_ComplexType>>
-inline constexpr bool __isclose_has_complex_comparison_v = false;
-
-template <class _ComplexType>
-inline constexpr bool __isclose_has_complex_comparison_v<_ComplexType, true> =
-  __isclose_has_comparison_v<typename _ComplexType::value_type>;
-
-template <class _ComplexType, class _AbsTol, bool = __is_cccl_complex_v<_ComplexType>>
-inline constexpr bool __isclose_has_complex_abs_tol_v = false;
-
-template <class _ComplexType, class _AbsTol>
-inline constexpr bool __isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol, true> =
-  __isclose_has_abs_tol_v<typename _ComplexType::value_type, _AbsTol>;
-
-template <class _Tp>
+template <typename _Tp>
 [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept
 {
   constexpr auto __digits = ::cuda::ceil_div(::cuda::std::numeric_limits<_Tp>::max_digits10, 2);
-  auto __tol              = 1.0f;
+  auto __exp              = 1.0f;
   for (int __i = 0; __i < __digits; ++__i)
   {
-    __tol /= 10.0f;
+    __exp *= 10.0f;
   }
-  return __tol;
+  return 1.0f / __exp;
 }
 
-template <class _Tp>
+template <typename _Tp>
 [[nodiscard]] _CCCL_API constexpr bool
-__isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+__isclose_fp_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
   _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
                "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
   _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Tp{0},
                "cuda::isclose: absolute tolerance must be finite and non-negative");
-
   if (__lhs == __rhs)
   {
     return true;
@@ -116,104 +83,159 @@ __isclose_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _T
   return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
 }
 
-template <class _ComplexType, class _AbsTol>
-[[nodiscard]] _CCCL_API bool __isclose_complex_impl(
+template <typename _ComplexType, typename _AbsTol>
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool __isclose_complex_impl(
   const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
-  using _Value      = typename _ComplexType::value_type;
-  using _Comparison = __isclose_comparison_t<_Value>;
-
-  _CCCL_ASSERT(::cuda::std::isfinite(__rel_tol) && __rel_tol >= 0.0f,
-               "cuda::isclose: relative tolerance must be finite and non-negative");
-  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= _Value{0},
+  using __scalar_t  = typename _ComplexType::value_type;
+  using __compare_t = __isclose_compare_t<__scalar_t>;
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= __scalar_t{0},
                "cuda::isclose: absolute tolerance must be finite and non-negative");
 
-  const auto __lhs_real = static_cast<_Comparison>(::cuda::__get_real(__lhs));
-  const auto __lhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__lhs));
-  const auto __rhs_real = static_cast<_Comparison>(::cuda::__get_real(__rhs));
-  const auto __rhs_imag = static_cast<_Comparison>(::cuda::__get_imag(__rhs));
-  const auto __abs      = static_cast<_Comparison>(__abs_tol);
+  const auto __lhs_real = static_cast<__compare_t>(::cuda::__get_real(__lhs));
+  const auto __lhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__lhs));
+  const auto __rhs_real = static_cast<__compare_t>(::cuda::__get_real(__rhs));
+  const auto __rhs_imag = static_cast<__compare_t>(::cuda::__get_imag(__rhs));
+  const auto __abs      = static_cast<__compare_t>(__abs_tol);
 
   if (__lhs_real == __rhs_real && __lhs_imag == __rhs_imag)
   {
     return true;
   }
-  if (::cuda::std::isnan(__lhs_real) || ::cuda::std::isnan(__lhs_imag) || ::cuda::std::isnan(__rhs_real)
-      || ::cuda::std::isnan(__rhs_imag))
+  if (!::cuda::std::isfinite(__lhs_real) || !::cuda::std::isfinite(__lhs_imag) || !::cuda::std::isfinite(__rhs_real)
+      || !::cuda::std::isfinite(__rhs_imag))
   {
     return false;
   }
-  if (::cuda::std::isinf(__lhs_real) || ::cuda::std::isinf(__lhs_imag) || ::cuda::std::isinf(__rhs_real)
-      || ::cuda::std::isinf(__rhs_imag))
+  const auto __diff      = ::cuda::std::hypot(__lhs_real - __rhs_real, __lhs_imag - __rhs_imag);
+  const auto __lhs_abs   = ::cuda::std::hypot(__lhs_real, __lhs_imag);
+  const auto __rhs_abs   = ::cuda::std::hypot(__rhs_real, __rhs_imag);
+  const auto __rel_value = __rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs);
+  return __diff <= ::cuda::std::max(__abs, __rel_value);
+}
+
+template <typename _Tp>
+[[nodiscard]] _CCCL_API constexpr ::cuda::std::make_unsigned_t<_Tp>
+__safe_diff(const _Tp __lhs, const _Tp __rhs) noexcept
+{
+  using __unsigned_t           = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs         = ::cuda::uabs(__lhs);
+  const auto __rhs_abs         = ::cuda::uabs(__rhs);
+  const auto __is_lhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __lhs < _Tp{0};
+  const auto __is_rhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __rhs < _Tp{0};
+
+  if (__is_lhs_negative != __is_rhs_negative)
   {
-    return false;
+    return static_cast<__unsigned_t>(__lhs_abs + __rhs_abs);
   }
+  return (__lhs_abs < __rhs_abs)
+         ? static_cast<__unsigned_t>(__rhs_abs - __lhs_abs)
+         : static_cast<__unsigned_t>(__lhs_abs - __rhs_abs);
+}
 
-  const auto __diff = ::cuda::std::hypot(
-    static_cast<_Comparison>(__lhs_real - __rhs_real), static_cast<_Comparison>(__lhs_imag - __rhs_imag));
-  const auto __lhs_abs = ::cuda::std::hypot(__lhs_real, __lhs_imag);
-  const auto __rhs_abs = ::cuda::std::hypot(__rhs_real, __rhs_imag);
-  return __diff
-      <= ::cuda::std::max(__abs_tol, static_cast<_Comparison>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs)));
+template <typename _Tp>
+[[nodiscard]] _CCCL_API constexpr bool
+__isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
+{
+  _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
+               "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
+  if constexpr (::cuda::std::__cccl_is_signed_integer_v<_Tp>)
+  {
+    _CCCL_ASSERT(__abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be non-negative");
+  }
+  using __unsigned_t     = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs   = ::cuda::uabs(__rhs);
+  const auto __diff      = ::cuda::__safe_diff(__lhs, __rhs);
+  const auto __abs       = static_cast<__unsigned_t>(__abs_tol);
+  const auto __rel_value = static_cast<__unsigned_t>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::max(__abs, __rel_value);
 }
 
 //----------------------------------------------------------------------------------------------------------------------
+// Public API
+
+// Scalar overloads
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
-_CCCL_TEMPLATE(class _Tp, class _AbsTol)
-_CCCL_REQUIRES(__isclose_has_abs_tol_v<_Tp, _AbsTol>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool
-isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
+isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::__isclose_impl(
-    static_cast<_Comparison>(__lhs), static_cast<_Comparison>(__rhs), __rel_tol, static_cast<_Comparison>(__abs_tol));
+  if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
+  {
+    return ::cuda::__isclose_integer_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+  }
+  else
+  {
+    using __value_t = __isclose_compare_t<_Tp>;
+    return ::cuda::__isclose_fp_impl(
+      static_cast<__value_t>(__lhs), static_cast<__value_t>(__rhs), __rel_tol, static_cast<__value_t>(__abs_tol));
+  }
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
-_CCCL_TEMPLATE(class _Tp)
-_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0});
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
-_CCCL_TEMPLATE(class _Tp)
-_CCCL_REQUIRES(__isclose_has_comparison_v<_Tp>)
+_CCCL_TEMPLATE(typename _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
 {
-  using _Comparison = __isclose_comparison_t<_Tp>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
+  if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
+  {
+    return __lhs == __rhs;
+  }
+  else
+  {
+    constexpr auto __rel_tol = ::cuda::__isclose_default_relative_tolerance<_Tp>();
+    return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Tp{0});
+  }
 }
 
+// Complex overloads
+
+template <typename _Tp, typename _AbsTol, bool = __is_any_complex_v<_Tp>>
+inline constexpr bool __isclose_complex_comparison_v = false;
+
+template <typename _Tp, typename _AbsTol>
+inline constexpr bool __isclose_complex_comparison_v<_Tp, _AbsTol, true> =
+  ::cuda::std::is_same_v<typename _Tp::value_type, _AbsTol>;
+
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
-_CCCL_TEMPLATE(class _ComplexType, class _AbsTol)
-_CCCL_REQUIRES(__isclose_has_complex_abs_tol_v<_ComplexType, _AbsTol>)
-[[nodiscard]] _CCCL_API bool
+_CCCL_TEMPLATE(typename _ComplexType, typename _AbsTol)
+_CCCL_REQUIRES(__isclose_complex_comparison_v<_ComplexType, _AbsTol>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
   return ::cuda::__isclose_complex_impl(__lhs, __rhs, __rel_tol, __abs_tol);
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative tolerance.
-_CCCL_TEMPLATE(class _ComplexType)
-_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
-[[nodiscard]] _CCCL_API bool
+_CCCL_TEMPLATE(typename _ComplexType)
+_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
 {
-  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, __rel_tol, _Comparison{0});
+  using __scalar_t = typename _ComplexType::value_type;
+  return ::cuda::isclose(__lhs, __rhs, __rel_tol, __scalar_t{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using the default relative tolerance.
-_CCCL_TEMPLATE(class _ComplexType)
-_CCCL_REQUIRES(__isclose_has_complex_comparison_v<_ComplexType>)
-[[nodiscard]] _CCCL_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
+_CCCL_TEMPLATE(typename _ComplexType)
+_CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
+[[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
-  using _Comparison = __isclose_comparison_t<typename _ComplexType::value_type>;
-  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<_Comparison>(), _Comparison{0});
+  using __scalar_t = typename _ComplexType::value_type;
+  return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<__scalar_t>(), __scalar_t{0});
 }
 
 _CCCL_END_NAMESPACE_CUDA
diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
index 25cd3dea157..54cef3f3761 100644
--- a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
@@ -15,15 +15,11 @@
 #include <cuda/std/limits>
 #include <cuda/std/type_traits>
 
-#include "test_macros.h"
-
-template <class T, class U, class = void>
-inline constexpr bool has_isclose_v = false;
+#if _CCCL_HAS_HOST_STD_LIB()
+#  include <complex>
+#endif // _CCCL_HAS_HOST_STD_LIB()
 
-template <class T, class U>
-inline constexpr bool
-  has_isclose_v<T, U, cuda::std::void_t<decltype(cuda::isclose(cuda::std::declval<T>(), cuda::std::declval<U>()))>> =
-    true;
+#include "test_macros.h"
 
 template <class T, class AbsTol, class = void>
 inline constexpr bool has_isclose_abs_tol_v = false;
@@ -35,18 +31,6 @@ inline constexpr bool has_isclose_abs_tol_v<
   cuda::std::void_t<decltype(cuda::isclose(
     cuda::std::declval<T>(), cuda::std::declval<T>(), 0.0f, cuda::std::declval<AbsTol>()))>> = true;
 
-template <class T>
-TEST_FUNC constexpr float default_rel_tol()
-{
-  constexpr auto digits = (cuda::std::numeric_limits<T>::max_digits10 + 1) / 2;
-  auto tol              = 1.0f;
-  for (int i = 0; i < digits; ++i)
-  {
-    tol /= 10.0f;
-  }
-  return tol;
-}
-
 template <class T>
 TEST_FUNC constexpr bool test_floating_point()
 {
@@ -57,57 +41,40 @@ TEST_FUNC constexpr bool test_floating_point()
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
 
-  constexpr auto tol = default_rel_tol<T>();
+  constexpr auto tol = cuda::__isclose_default_relative_tolerance<T>();
   assert(cuda::isclose(T{1}, T{1}));
-  assert(cuda::isclose(T{1}, T{1} + tol / T{2}));
-  assert(!cuda::isclose(T{1}, T{1} + tol * T{2}));
+  assert(cuda::isclose(T{1}, T{1} + static_cast<T>(tol / 2.0f)));
+  assert(!cuda::isclose(T{1}, T{1} + static_cast<T>(tol * 2.0f)));
 
   assert(cuda::isclose(T{10}, T{11}, 0.1f));
   assert(cuda::isclose(T{11}, T{10}, 0.1f));
   assert(!cuda::isclose(T{10}, T{12}, 0.1f));
 
-  assert(!cuda::isclose(T{0}, tol / T{2}));
-  assert(cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.5}));
-  assert(!cuda::isclose(T{0}, T{0.5}, 0.0f, T{0.25}));
-
   const auto inf = cuda::std::numeric_limits<T>::infinity();
   const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
   assert(cuda::isclose(inf, inf));
   assert(cuda::isclose(-inf, -inf));
   assert(!cuda::isclose(inf, -inf));
-  assert(!cuda::isclose(inf, T{1}, 10.0f));
   assert(!cuda::isclose(nan, nan));
   assert(!cuda::isclose(nan, T{}));
-
   return true;
 }
 
+template <typename T>
 TEST_FUNC constexpr bool test_integral()
 {
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0))>);
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f))>);
-  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(0, 0, 0.0f, 0.0))>);
-  static_assert(noexcept(cuda::isclose(0, 0)));
-  static_assert(noexcept(cuda::isclose(0, 0, 0.0f)));
-  static_assert(noexcept(cuda::isclose(0, 0, 0.0f, 0.0)));
-
-  assert(cuda::isclose(1, 1));
-  assert(cuda::isclose(1u, 1u));
-  assert(!cuda::isclose(1, 2));
-  assert(cuda::isclose(100, 101, 0.02f));
-  assert(cuda::isclose(101, 100, 0.02f));
-  assert(!cuda::isclose(100, 103, 0.02f));
-  assert(cuda::isclose(0, 1, 0.0f, 1.0));
-  assert(!cuda::isclose(0, 1, 0.0f, 0.5));
-
-  static_assert(!has_isclose_v<int, unsigned int>);
-  static_assert(!has_isclose_v<float, double>);
-  static_assert(has_isclose_abs_tol_v<double, float>);
-  static_assert(has_isclose_abs_tol_v<double, double>);
-  static_assert(!has_isclose_abs_tol_v<float, double>);
-  static_assert(has_isclose_abs_tol_v<int, float>);
-  static_assert(has_isclose_abs_tol_v<int, double>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f))>);
+  static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(T{}, T{}, 0.0f, T{}))>);
+  static_assert(noexcept(cuda::isclose(T{}, T{})));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
+  static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
 
+  assert(cuda::isclose(T{1}, T{1}));
+  assert(!cuda::isclose(T{1}, T{2}));
+  assert(cuda::isclose(T{100}, T{101}, 0.02f));
+  assert(cuda::isclose(T{101}, T{100}, 0.02f));
+  assert(!cuda::isclose(T{100}, T{103}, 0.02f));
   return true;
 }
 
@@ -115,42 +82,47 @@ template <class Complex>
 TEST_FUNC void test_complex()
 {
   using T = typename Complex::value_type;
-
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}))>);
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f))>);
   static_assert(cuda::std::is_same_v<bool, decltype(cuda::isclose(Complex{}, Complex{}, 0.0f, T{}))>);
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{})));
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f)));
-  static_assert(noexcept(cuda::isclose(Complex{}, Complex{}, 0.0f, T{})));
+  static_assert(noexcept(cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>())));
+  static_assert(noexcept(cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>(), 0.0f)));
+  static_assert(noexcept(
+    cuda::isclose(cuda::std::declval<Complex>(), cuda::std::declval<Complex>(), 0.0f, cuda::std::declval<T>())));
 
   assert(cuda::isclose(Complex{T{1}, T{2}}, Complex{T{1}, T{2}}));
   assert(cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{4.4}}, 0.1f));
   assert(!cuda::isclose(Complex{T{3}, T{4}}, Complex{T{3}, T{5}}, 0.1f));
 
-  // PEP 485 uses complex magnitudes, not component-wise scalar comparisons.
-  assert(cuda::isclose(Complex{T{1}, T{1}}, Complex{T{2}, T{0}}, 0.75f));
-
   assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}));
-  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.5}));
+  auto abs_tol = T{0.5};
+#if _LIBCUDACXX_HAS_NVBF16()
+  if constexpr (cuda::std::is_same_v<T, __nv_bfloat16>)
+  {
+    abs_tol = T{0.51};
+  }
+#endif // _LIBCUDACXX_HAS_NVBF16()
+  assert(cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, abs_tol));
   assert(!cuda::isclose(Complex{T{0}, T{0}}, Complex{T{0.3}, T{0.4}}, 0.0f, T{0.25}));
 
   const auto inf = cuda::std::numeric_limits<T>::infinity();
   const auto nan = cuda::std::numeric_limits<T>::quiet_NaN();
   assert(cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{1}}));
-  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 10.0f));
+  assert(!cuda::isclose(Complex{inf, T{1}}, Complex{inf, T{2}}, 1.0f));
   assert(!cuda::isclose(Complex{nan, T{}}, Complex{nan, T{}}));
   assert(!cuda::isclose(Complex{nan, T{}}, Complex{}));
 }
 
-TEST_FUNC void test_mixed_complex()
+TEST_FUNC constexpr void test_invalid_complex_cases()
 {
-  static_assert(!has_isclose_v<cuda::std::complex<float>, cuda::std::complex<double>>);
-  static_assert(!has_isclose_v<cuda::complex<float>, cuda::complex<double>>);
-  static_assert(!has_isclose_v<cuda::complex<float>, cuda::std::complex<float>>);
-  static_assert(has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
-  static_assert(has_isclose_abs_tol_v<cuda::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::std::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<cuda::complex<double>, float>);
   static_assert(!has_isclose_abs_tol_v<cuda::std::complex<float>, double>);
   static_assert(!has_isclose_abs_tol_v<cuda::complex<float>, double>);
+#if _CCCL_HAS_HOST_STD_LIB()
+  static_assert(!has_isclose_abs_tol_v<std::complex<double>, float>);
+  static_assert(!has_isclose_abs_tol_v<std::complex<float>, double>);
+#endif // _CCCL_HAS_HOST_STD_LIB()
 }
 
 TEST_FUNC constexpr bool test()
@@ -160,22 +132,66 @@ TEST_FUNC constexpr bool test()
 #if _CCCL_HAS_LONG_DOUBLE()
   test_floating_point<long double>();
 #endif // _CCCL_HAS_LONG_DOUBLE()
+#if _CCCL_HAS_FLOAT128()
+  test_floating_point<__float128>();
+#endif // _CCCL_HAS_FLOAT128()
+  test_integral<signed char>();
+  test_integral<unsigned char>();
+  test_integral<short>();
+  test_integral<unsigned short>();
+  test_integral<int>();
+  test_integral<unsigned>();
+  test_integral<long>();
+  test_integral<unsigned long>();
+  test_integral<long long>();
+  test_integral<unsigned long long>();
+#if _CCCL_HAS_INT128()
+  test_integral<__int128_t>();
+  test_integral<__uint128_t>();
+#endif // _CCCL_HAS_INT128()
+
+  test_invalid_complex_cases();
+  return true;
+}
 
-  test_integral();
+template <template <typename> class Complex>
+TEST_FUNC void test_complex_types_runtime()
+{
+  test_complex<Complex<float>>();
+  test_complex<Complex<double>>();
+#if _CCCL_HAS_LONG_DOUBLE()
+  test_complex<Complex<long double>>();
+#endif // _CCCL_HAS_LONG_DOUBLE()
+  // complex__float128 support requires std::hypot overload
+}
 
+TEST_FUNC bool test_runtime()
+{
+#if _LIBCUDACXX_HAS_NVFP16()
+  test_floating_point<__half>();
+#endif // _LIBCUDACXX_HAS_NVFP16()
+#if _LIBCUDACXX_HAS_NVBF16()
+  test_floating_point<__nv_bfloat16>();
+#endif // _LIBCUDACXX_HAS_NVBF16()
+
+  test_complex_types_runtime<cuda::std::complex>();
+#if _LIBCUDACXX_HAS_NVFP16()
+  test_complex<cuda::std::complex<__half>>();
+#endif // _LIBCUDACXX_HAS_NVFP16()
+#if _LIBCUDACXX_HAS_NVBF16()
+  test_complex<cuda::std::complex<__nv_bfloat16>>();
+#endif // _LIBCUDACXX_HAS_NVBF16()
+  test_complex_types_runtime<cuda::complex>();
+#if _CCCL_HAS_HOST_STD_LIB()
+  NV_IF_TARGET(NV_IS_HOST, (test_complex_types_runtime<std::complex>();))
+#endif // _CCCL_HAS_HOST_STD_LIB()
   return true;
 }
 
 int main(int, char**)
 {
-  test();
+  assert(test());
+  assert(test_runtime());
   static_assert(test());
-
-  test_complex<cuda::std::complex<float>>();
-  test_complex<cuda::std::complex<double>>();
-  test_complex<cuda::complex<float>>();
-  test_complex<cuda::complex<double>>();
-  test_mixed_complex();
-
   return 0;
 }

From 1d5033228046b6991d88b03eb5540867ddd3f43e Mon Sep 17 00:00:00 2001
From: Federico Busato <50413820+fbusato@users.noreply.github.com>
Date: Wed, 24 Jun 2026 09:54:18 -0700
Subject: [PATCH 09/14] Update libcudacxx/include/cuda/__numeric/isclose.h

Co-authored-by: Jacob Faibussowitsch <jacob.fai@gmail.com>
---
 libcudacxx/include/cuda/__numeric/isclose.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 41a5acf8f8e..49d7ebb1248 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -45,7 +45,7 @@
 _CCCL_BEGIN_NAMESPACE_CUDA
 
 template <typename _Tp>
-using __isclose_compare_t = ::cuda::std::
+using __isclose_compare_t _CCCL_NODEBUG_ALIAS = ::cuda::std::
   conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>;
 
 template <typename _Tp>

From c5657f4273dbec5022b2dcf6382020206bc44fb6 Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Wed, 24 Jun 2026 10:53:40 -0700
Subject: [PATCH 10/14] a few improvements

---
 libcudacxx/include/cuda/__numeric/isclose.h | 82 +++++++++++++++------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index 49d7ebb1248..ad7efb13948 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -31,6 +31,7 @@
 #include <cuda/std/__cmath/abs.h>
 #include <cuda/std/__cmath/hypot.h>
 #include <cuda/std/__cmath/isfinite.h>
+#include <cuda/std/__cmath/min_max.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__limits/numeric_limits.h>
 #include <cuda/std/__type_traits/conditional.h>
@@ -76,19 +77,22 @@ __isclose_fp_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const
   {
     return false;
   }
-  const auto __diff      = ::cuda::std::abs(__lhs - __rhs);
-  const auto __lhs_abs   = ::cuda::std::abs(__lhs);
-  const auto __rhs_abs   = ::cuda::std::abs(__rhs);
-  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
-  return __diff <= ::cuda::std::max(__abs_tol, __rel_value);
+  const auto __diff      = ::cuda::std::fabs(__lhs - __rhs);
+  const auto __lhs_abs   = ::cuda::std::fabs(__lhs);
+  const auto __rhs_abs   = ::cuda::std::fabs(__rhs);
+  const auto __rel_value = static_cast<_Tp>(__rel_tol * ::cuda::std::fmax(__lhs_abs, __rhs_abs));
+  return __diff <= ::cuda::std::fmax(__abs_tol, __rel_value);
 }
 
 template <typename _ComplexType, typename _AbsTol>
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool __isclose_complex_impl(
   const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol, const _AbsTol __abs_tol) noexcept
 {
-  using __scalar_t  = typename _ComplexType::value_type;
-  using __compare_t = __isclose_compare_t<__scalar_t>;
+  using __scalar_t _CCCL_NODEBUG_ALIAS  = typename _ComplexType::value_type;
+  using __compare_t _CCCL_NODEBUG_ALIAS = __isclose_compare_t<__scalar_t>;
+#if _CCCL_HAS_FLOAT128()
+  static_assert(!::cuda::std::is_same_v<__scalar_t, __float128>, "cuda::isclose: __float128 is not supported");
+#endif // _CCCL_HAS_FLOAT128()
   _CCCL_ASSERT(::cuda::in_range(__rel_tol, 0.0f, 1.0f),
                "cuda::isclose: relative tolerance must be in the range [0.0, 1.0]");
   _CCCL_ASSERT(::cuda::std::isfinite(__abs_tol) && __abs_tol >= __scalar_t{0},
@@ -112,19 +116,19 @@ template <typename _ComplexType, typename _AbsTol>
   const auto __diff      = ::cuda::std::hypot(__lhs_real - __rhs_real, __lhs_imag - __rhs_imag);
   const auto __lhs_abs   = ::cuda::std::hypot(__lhs_real, __lhs_imag);
   const auto __rhs_abs   = ::cuda::std::hypot(__rhs_real, __rhs_imag);
-  const auto __rel_value = __rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs);
-  return __diff <= ::cuda::std::max(__abs, __rel_value);
+  const auto __rel_value = __rel_tol * ::cuda::std::fmax(__lhs_abs, __rhs_abs);
+  return __diff <= ::cuda::std::fmax(__abs, __rel_value);
 }
 
 template <typename _Tp>
 [[nodiscard]] _CCCL_API constexpr ::cuda::std::make_unsigned_t<_Tp>
-__safe_diff(const _Tp __lhs, const _Tp __rhs) noexcept
+__safe_abs_diff(const _Tp __lhs, const _Tp __rhs) noexcept
 {
-  using __unsigned_t           = ::cuda::std::make_unsigned_t<_Tp>;
-  const auto __lhs_abs         = ::cuda::uabs(__lhs);
-  const auto __rhs_abs         = ::cuda::uabs(__rhs);
-  const auto __is_lhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __lhs < _Tp{0};
-  const auto __is_rhs_negative = ::cuda::std::__cccl_is_signed_integer_v<_Tp> && __rhs < _Tp{0};
+  using __unsigned_t _CCCL_NODEBUG_ALIAS = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs                   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs                   = ::cuda::uabs(__rhs);
+  const auto __is_lhs_negative           = ::cuda::std::is_signed_v<_Tp> && __lhs < _Tp{0};
+  const auto __is_rhs_negative           = ::cuda::std::is_signed_v<_Tp> && __rhs < _Tp{0};
 
   if (__is_lhs_negative != __is_rhs_negative)
   {
@@ -145,11 +149,11 @@ __isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol,
   {
     _CCCL_ASSERT(__abs_tol >= _Tp{0}, "cuda::isclose: absolute tolerance must be non-negative");
   }
-  using __unsigned_t     = ::cuda::std::make_unsigned_t<_Tp>;
-  const auto __lhs_abs   = ::cuda::uabs(__lhs);
-  const auto __rhs_abs   = ::cuda::uabs(__rhs);
-  const auto __diff      = ::cuda::__safe_diff(__lhs, __rhs);
-  const auto __abs       = static_cast<__unsigned_t>(__abs_tol);
+  using __unsigned_t _CCCL_NODEBUG_ALIAS = ::cuda::std::make_unsigned_t<_Tp>;
+  const auto __lhs_abs                   = ::cuda::uabs(__lhs);
+  const auto __rhs_abs                   = ::cuda::uabs(__rhs);
+  const auto __diff                      = ::cuda::__safe_abs_diff(__lhs, __rhs);
+  const auto __abs                       = static_cast<__unsigned_t>(__abs_tol);
   const auto __rel_value = static_cast<__unsigned_t>(__rel_tol * ::cuda::std::max(__lhs_abs, __rhs_abs));
   return __diff <= ::cuda::std::max(__abs, __rel_value);
 }
@@ -160,6 +164,12 @@ __isclose_integer_impl(const _Tp __lhs, const _Tp __rhs, const float __rel_tol,
 // Scalar overloads
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative and absolute tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @param __abs_tol The absolute tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool
@@ -167,17 +177,22 @@ isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol, const _Tp __abs
 {
   if constexpr (::cuda::std::__cccl_is_integer_v<_Tp>)
   {
-    return ::cuda::__isclose_integer_impl(__lhs, __rhs, __rel_tol, __abs_tol);
+    return ::cuda::__isclose_integer_impl(+__lhs, +__rhs, __rel_tol, +__abs_tol);
   }
   else
   {
-    using __value_t = __isclose_compare_t<_Tp>;
+    using __value_t _CCCL_NODEBUG_ALIAS = __isclose_compare_t<_Tp>;
     return ::cuda::__isclose_fp_impl(
       static_cast<__value_t>(__lhs), static_cast<__value_t>(__rhs), __rel_tol, static_cast<__value_t>(__abs_tol));
   }
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using a relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs, const float __rel_tol) noexcept
@@ -186,6 +201,10 @@ _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_poin
 }
 
 //! @brief Checks whether two arithmetic values are close to each other using the default relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _Tp)
 _CCCL_REQUIRES(::cuda::std::__cccl_is_integer_v<_Tp> || ::cuda::is_floating_point_v<_Tp>)
 [[nodiscard]] _CCCL_API constexpr bool isclose(const _Tp __lhs, const _Tp __rhs) noexcept
@@ -211,6 +230,12 @@ inline constexpr bool __isclose_complex_comparison_v<_Tp, _AbsTol, true> =
   ::cuda::std::is_same_v<typename _Tp::value_type, _AbsTol>;
 
 //! @brief Checks whether two complex values are close to each other using a relative and absolute tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @param __abs_tol The absolute tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType, typename _AbsTol)
 _CCCL_REQUIRES(__isclose_complex_comparison_v<_ComplexType, _AbsTol>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool
@@ -220,21 +245,30 @@ isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_
 }
 
 //! @brief Checks whether two complex values are close to each other using a relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @param __rel_tol The relative tolerance.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType)
 _CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool
 isclose(const _ComplexType& __lhs, const _ComplexType& __rhs, const float __rel_tol) noexcept
 {
-  using __scalar_t = typename _ComplexType::value_type;
+  using __scalar_t _CCCL_NODEBUG_ALIAS = typename _ComplexType::value_type;
   return ::cuda::isclose(__lhs, __rhs, __rel_tol, __scalar_t{0});
 }
 
 //! @brief Checks whether two complex values are close to each other using the default relative tolerance.
+//!
+//! @param __lhs The first value to compare.
+//! @param __rhs The second value to compare.
+//! @return True if __lhs and __rhs are close to each other, false otherwise.
 _CCCL_TEMPLATE(typename _ComplexType)
 _CCCL_REQUIRES(__is_any_complex_v<_ComplexType>)
 [[nodiscard]] _CCCL_HOST_DEVICE_API bool isclose(const _ComplexType& __lhs, const _ComplexType& __rhs) noexcept
 {
-  using __scalar_t = typename _ComplexType::value_type;
+  using __scalar_t _CCCL_NODEBUG_ALIAS = typename _ComplexType::value_type;
   return ::cuda::isclose(__lhs, __rhs, ::cuda::__isclose_default_relative_tolerance<__scalar_t>(), __scalar_t{0});
 }
 

From b22956cca34ec9caeb7833490cb76b20775ea9a7 Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Fri, 26 Jun 2026 12:47:00 -0700
Subject: [PATCH 11/14] improve test, docs

---
 .../extended_api/numeric/isclose.rst          | 62 ++++++++-----------
 libcudacxx/include/cuda/__numeric/isclose.h   |  1 +
 .../cuda/numeric/isclose/isclose.pass.cpp     | 21 +++++++
 3 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst
index 2da1f5d8b08..694074179a3 100644
--- a/docs/libcudacxx/extended_api/numeric/isclose.rst
+++ b/docs/libcudacxx/extended_api/numeric/isclose.rst
@@ -3,51 +3,52 @@
 ``cuda::isclose``
 =================
 
+Defined in ``<cuda/numeric>`` header.
+
 .. code:: cpp
 
+   namespace cuda {
+
    template <class T>
    [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept;
 
    template <class T>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol) noexcept;
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float relative_tolerance) noexcept;
 
    template <class T, class AbsTol>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float rel_tol, AbsTol abs_tol) noexcept;
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float relative_tolerance, AbsTol absolute_tolerance) noexcept;
 
    template <class Complex>
    [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
 
    template <class Complex>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol) noexcept;
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float relative_tolerance) noexcept;
 
    template <class Complex, class AbsTol>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float rel_tol, AbsTol abs_tol) noexcept;
-
-``cuda::isclose`` checks whether two values are approximately equal using the weak symmetric comparison described by
-`PEP 485 <https://peps.python.org/pep-0485/>`_:
-
-.. code:: cpp
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float relative_tolerance, AbsTol absolute_tolerance) noexcept;
 
-   abs(lhs - rhs) <= max(abs_tol, rel_tol * max(abs(lhs), abs(rhs)))
+   } // namespace cuda
 
-The overloads without ``abs_tol`` use ``abs_tol == 0``. The overloads without ``rel_tol`` use a default relative
-tolerance based on the promoted comparison type:
+``cuda::isclose`` checks whether two values are approximately equal using the weak symmetric comparison in a similar manner to `PEP 485 <https://peps.python.org/pep-0485/>`_:
 
 .. code:: cpp
 
-   pow(10, -ceil_div(cuda::std::numeric_limits<Comparison>::max_digits10, 2))
+   abs(lhs - rhs) <= max(absolute_tolerance, relative_tolerance * max(abs(lhs), abs(rhs)))
 
-For ``double`` comparisons, this is ``1e-9``, matching the default relative tolerance from PEP 485. Lower- and
-higher-precision comparison types use a correspondingly smaller or larger default.
+- The overloads without ``absolute_tolerance`` use ``absolute_tolerance == 0``.
+- The overloads without ``relative_tolerance`` use a default relative tolerance based on half of available digits of accuracy.
 
 **Parameters**
 
 - ``lhs``: The first value to compare.
 - ``rhs``: The second value to compare.
-- ``rel_tol``: The relative tolerance. Must be finite and non-negative. Passing ``0`` performs a purely absolute
-  tolerance check when ``abs_tol`` is non-zero.
-- ``abs_tol``: The absolute tolerance. Must be finite and non-negative. This is useful for comparisons near zero. The
-  supplied type may promote to the value comparison type, but may not make the value comparison type wider.
+- ``relative_tolerance``: The relative tolerance. Passing ``0`` performs a purely absolute tolerance check when ``absolute_tolerance`` is non-zero.
+- ``absolute_tolerance``: The absolute tolerance. This is useful for comparisons near zero. 
+
+**Precision**
+
+- ``relative_tolerance``:Must be in the range [0.0, 1.0].
+- ``absolute_tolerance``: Must be finite and non-negative.
 
 **Return value**
 
@@ -55,47 +56,34 @@ higher-precision comparison types use a correspondingly smaller or larger defaul
 
 **Constraints**
 
-- Scalar overloads require ``lhs`` and ``rhs`` to have the same arithmetic type. ``abs_tol`` must be representable in the
-  value comparison type after promotion. For example, ``double`` values may be compared with a ``float`` absolute
-  tolerance, but ``float`` values cannot use a ``double`` absolute tolerance. ``rel_tol`` is always a ``float``.
-- Complex overloads accept ``cuda::std::complex<T>`` and ``cuda::complex<T>`` operands. ``lhs`` and ``rhs`` must have the
-  same complex type, and ``abs_tol`` must be representable in the complex value comparison type after promotion.
-  ``rel_tol`` is always a ``float``.
+- Scalar overloads require ``lhs``, ``rhs``, ``absolute_tolerance`` to have the same arithmetic type (interger or floating point).
+- Complex overloads accept ``cuda::std::complex<T>``, ``cuda::complex<T>``, ``std::complex<T>`` operands.
 
 **Special values**
 
-- NaN is never close to any value, including another NaN.
+- ``NaN`` is never close to any value, including another ``NaN``.
 - Infinity and negative infinity are only close to themselves.
-- With the default ``abs_tol == 0``, comparisons near zero generally require an explicitly supplied absolute tolerance.
-
-For complex values, ``cuda::isclose`` follows the ``cmath.isclose`` model from PEP 485: the difference and scaling values
-are computed from complex magnitudes, rather than comparing the real and imaginary components independently.
 
 Example
 -------
 
 .. code:: cuda
 
-    #include <cuda/__complex_>
     #include <cuda/numeric>
     #include <cuda/std/cassert>
     #include <cuda/std/complex>
 
     __global__ void kernel()
     {
-        assert(cuda::isclose(1.0, 1.0 + 5e-10));
+        assert(cuda::isclose( 1.0, 1.0 + 5e-10));
         assert(!cuda::isclose(1.0, 1.0 + 5e-8));
 
         assert(!cuda::isclose(0.0, 1e-12));
-        assert(cuda::isclose(0.0, 1e-12, 0.0, 1e-12));
+        assert(cuda::isclose( 0.0, 1e-12, 0.0, 1e-12));
 
         cuda::std::complex<double> z1{1.0, 1.0};
         cuda::std::complex<double> z2{2.0, 0.0};
         assert(cuda::isclose(z1, z2, 0.75));
-
-        cuda::complex<float> z3{1.0f, 1.0f};
-        cuda::complex<float> z4{2.0f, 0.0f};
-        assert(cuda::isclose(z3, z4, 0.75f));
     }
 
     int main()
diff --git a/libcudacxx/include/cuda/__numeric/isclose.h b/libcudacxx/include/cuda/__numeric/isclose.h
index ad7efb13948..d05f6fc1e6c 100644
--- a/libcudacxx/include/cuda/__numeric/isclose.h
+++ b/libcudacxx/include/cuda/__numeric/isclose.h
@@ -49,6 +49,7 @@ template <typename _Tp>
 using __isclose_compare_t _CCCL_NODEBUG_ALIAS = ::cuda::std::
   conditional_t<(::cuda::std::__is_extended_floating_point_v<_Tp> && sizeof(_Tp) <= sizeof(float)), float, _Tp>;
 
+// compute 10^-(digits10 / 2)
 template <typename _Tp>
 [[nodiscard]] _CCCL_API _CCCL_CONSTEVAL float __isclose_default_relative_tolerance() noexcept
 {
diff --git a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
index 54cef3f3761..5b329f6e9ba 100644
--- a/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/numeric/isclose/isclose.pass.cpp
@@ -70,11 +70,32 @@ TEST_FUNC constexpr bool test_integral()
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f)));
   static_assert(noexcept(cuda::isclose(T{}, T{}, 0.0f, T{})));
 
+  // equal values
   assert(cuda::isclose(T{1}, T{1}));
   assert(!cuda::isclose(T{1}, T{2}));
+
+  // relative tolerance values
   assert(cuda::isclose(T{100}, T{101}, 0.02f));
   assert(cuda::isclose(T{101}, T{100}, 0.02f));
   assert(!cuda::isclose(T{100}, T{103}, 0.02f));
+
+  //  absolute tolerance values, positive/negative values
+  assert(cuda::isclose(-3, 4, 0.0f, 7));
+  assert(!cuda::isclose(-3, 4, 0.0f, 6));
+  assert(cuda::isclose(3, -4, 0.0f, 7));
+  assert(!cuda::isclose(3, -4, 0.0f, 6));
+
+  //  absolute tolerance values, negative values
+  assert(cuda::isclose(-3, -5, 0.0f, 2));
+  assert(!cuda::isclose(-3, -5, 0.0f, 1));
+  assert(cuda::isclose(-5, -3, 0.0f, 2));
+  assert(!cuda::isclose(-5, -3, 0.0f, 1));
+
+  // absolute tolerance values, positive values
+  assert(cuda::isclose(3u, 5u, 0.0f, 2u));
+  assert(!cuda::isclose(3u, 5u, 0.0f, 1u));
+  assert(cuda::isclose(5u, 3u, 0.0f, 2u));
+  assert(!cuda::isclose(5u, 3u, 0.0f, 1u));
   return true;
 }
 

From 0ba51438005920c7366eae92a2e8491a84ce8328 Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Fri, 26 Jun 2026 12:48:56 -0700
Subject: [PATCH 12/14] delete temp

---
 temp.build.yml | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 temp.build.yml

diff --git a/temp.build.yml b/temp.build.yml
deleted file mode 100644
index 556595e7630..00000000000
--- a/temp.build.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-cuda_thread_count: 1
-cuda_block_shmem_size: 0
-builds:

From 1aa44855ea660b678eaa427d7b666b21508216ee Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Fri, 26 Jun 2026 13:25:36 -0700
Subject: [PATCH 13/14] documentation fixes

---
 .../extended_api/numeric/isclose.rst          | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst
index 694074179a3..880ad4e2f77 100644
--- a/docs/libcudacxx/extended_api/numeric/isclose.rst
+++ b/docs/libcudacxx/extended_api/numeric/isclose.rst
@@ -13,19 +13,25 @@ Defined in ``<cuda/numeric>`` header.
    [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept;
 
    template <class T>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float relative_tolerance) noexcept;
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs,
+                                        float relative_tolerance) noexcept;
 
-   template <class T, class AbsTol>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs, float relative_tolerance, AbsTol absolute_tolerance) noexcept;
+   template <class T>
+   [[nodiscard]] constexpr bool isclose(T lhs, T rhs,
+                                        float relative_tolerance,
+                                        T     absolute_tolerance) noexcept;
 
    template <class Complex>
    [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
 
    template <class Complex>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float relative_tolerance) noexcept;
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs,
+                              float relative_tolerance) noexcept;
 
    template <class Complex, class AbsTol>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs, float relative_tolerance, AbsTol absolute_tolerance) noexcept;
+   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs,
+                              float  relative_tolerance,
+                              AbsTol absolute_tolerance) noexcept;
 
    } // namespace cuda
 
@@ -43,7 +49,7 @@ Defined in ``<cuda/numeric>`` header.
 - ``lhs``: The first value to compare.
 - ``rhs``: The second value to compare.
 - ``relative_tolerance``: The relative tolerance. Passing ``0`` performs a purely absolute tolerance check when ``absolute_tolerance`` is non-zero.
-- ``absolute_tolerance``: The absolute tolerance. This is useful for comparisons near zero. 
+- ``absolute_tolerance``: The absolute tolerance. This is useful for comparisons near zero.
 
 **Precision**
 
@@ -56,8 +62,9 @@ Defined in ``<cuda/numeric>`` header.
 
 **Constraints**
 
-- Scalar overloads require ``lhs``, ``rhs``, ``absolute_tolerance`` to have the same arithmetic type (interger or floating point).
+- Scalar overloads require ``lhs``, ``rhs``, ``absolute_tolerance`` to have the same arithmetic type (integer or floating point).
 - Complex overloads accept ``cuda::std::complex<T>``, ``cuda::complex<T>``, ``std::complex<T>`` operands.
+- ``AbsTol`` must be the same type as the complex value type.
 
 **Special values**
 

From 5ed596566e24e542820abf6404ad99a72de45e6b Mon Sep 17 00:00:00 2001
From: fbusato <fbusato@nvidia.com>
Date: Fri, 26 Jun 2026 15:01:14 -0700
Subject: [PATCH 14/14] doc: annotate exec space

---
 .../extended_api/numeric/isclose.rst          | 42 ++++++++++---------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/docs/libcudacxx/extended_api/numeric/isclose.rst b/docs/libcudacxx/extended_api/numeric/isclose.rst
index 880ad4e2f77..d385533454e 100644
--- a/docs/libcudacxx/extended_api/numeric/isclose.rst
+++ b/docs/libcudacxx/extended_api/numeric/isclose.rst
@@ -10,28 +10,30 @@ Defined in ``<cuda/numeric>`` header.
    namespace cuda {
 
    template <class T>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs) noexcept;
+   [[nodiscard]] __host__ __device__ __tile__ constexpr
+    bool isclose(T lhs, T rhs) noexcept;
 
    template <class T>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs,
-                                        float relative_tolerance) noexcept;
+   [[nodiscard]] __host__ __device__ __tile__ constexpr
+    bool isclose(T lhs, T rhs, float relative_tol) noexcept;
 
    template <class T>
-   [[nodiscard]] constexpr bool isclose(T lhs, T rhs,
-                                        float relative_tolerance,
-                                        T     absolute_tolerance) noexcept;
+   [[nodiscard]] __host__ __device__ __tile__ constexpr
+   bool isclose(T lhs, T rhs, float relative_tol, T absolute_tol) noexcept;
 
    template <class Complex>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
+   [[nodiscard]] __host__ __device__ constexpr
+   bool isclose(const Complex& lhs, const Complex& rhs) noexcept;
 
    template <class Complex>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs,
-                              float relative_tolerance) noexcept;
+   [[nodiscard]] __host__ __device__ constexpr
+   bool isclose(const Complex& lhs, const Complex& rhs, float relative_tol) noexcept;
 
    template <class Complex, class AbsTol>
-   [[nodiscard]] bool isclose(const Complex& lhs, const Complex& rhs,
-                              float  relative_tolerance,
-                              AbsTol absolute_tolerance) noexcept;
+   [[nodiscard]] __host__ __device__ constexpr
+   bool isclose(const Complex& lhs, const Complex& rhs,
+                float  relative_tol,
+                AbsTol absolute_tol) noexcept;
 
    } // namespace cuda
 
@@ -39,22 +41,22 @@ Defined in ``<cuda/numeric>`` header.
 
 .. code:: cpp
 
-   abs(lhs - rhs) <= max(absolute_tolerance, relative_tolerance * max(abs(lhs), abs(rhs)))
+   abs(lhs - rhs) <= max(absolute_tol, relative_tol * max(abs(lhs), abs(rhs)))
 
-- The overloads without ``absolute_tolerance`` use ``absolute_tolerance == 0``.
-- The overloads without ``relative_tolerance`` use a default relative tolerance based on half of available digits of accuracy.
+- The overloads without ``absolute_tol`` use ``absolute_tol == 0``.
+- The overloads without ``relative_tol`` use a default relative tolerance based on half of available digits of accuracy.
 
 **Parameters**
 
 - ``lhs``: The first value to compare.
 - ``rhs``: The second value to compare.
-- ``relative_tolerance``: The relative tolerance. Passing ``0`` performs a purely absolute tolerance check when ``absolute_tolerance`` is non-zero.
-- ``absolute_tolerance``: The absolute tolerance. This is useful for comparisons near zero.
+- ``relative_tol``: The relative tolerance. Passing ``0`` performs a purely absolute tolerance check when ``absolute_tol`` is non-zero.
+- ``absolute_tol``: The absolute tolerance. This is useful for comparisons near zero.
 
 **Precision**
 
-- ``relative_tolerance``:Must be in the range [0.0, 1.0].
-- ``absolute_tolerance``: Must be finite and non-negative.
+- ``relative_tol``:Must be in the range [0.0, 1.0].
+- ``absolute_tol``: Must be finite and non-negative.
 
 **Return value**
 
@@ -62,7 +64,7 @@ Defined in ``<cuda/numeric>`` header.
 
 **Constraints**
 
-- Scalar overloads require ``lhs``, ``rhs``, ``absolute_tolerance`` to have the same arithmetic type (integer or floating point).
+- Scalar overloads require ``lhs``, ``rhs``, ``absolute_tol`` to have the same arithmetic type (integer or floating point).
 - Complex overloads accept ``cuda::std::complex<T>``, ``cuda::complex<T>``, ``std::complex<T>`` operands.
 - ``AbsTol`` must be the same type as the complex value type.