diff --git a/libcudacxx/include/cuda/__argument/argument.h b/libcudacxx/include/cuda/__argument/argument.h index bc581b6674b..81f1d180589 100644 --- a/libcudacxx/include/cuda/__argument/argument.h +++ b/libcudacxx/include/cuda/__argument/argument.h @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -40,6 +42,7 @@ #include #include #include +#include #include #include @@ -111,16 +114,60 @@ class constant }; //! @brief Wraps a compile-time constant argument sequence. -template +template class __constant_sequence { public: - using value_type = ::cuda::std::remove_cvref_t; - using __element_type = __element_type_of_t; - - static_assert(__is_sequence_v, "The value type of __constant_sequence must be a sequence"); + using __element_type = ::cuda::std::remove_cvref_t<_Tp>; + using value_type = ::cuda::std::array<__element_type, sizeof...(_Vs)>; + static constexpr ::cuda::std::size_t size = sizeof...(_Vs); }; +template +_CCCL_API constexpr auto __make_constant_sequence_impl(::cuda::std::index_sequence<_Is...>) +{ + using __raw_array = ::cuda::std::remove_cvref_t; + + if constexpr (::cuda::std::is_bounded_array_v<__raw_array>) + { + using _Tp = ::cuda::std::remove_cv_t<::cuda::std::remove_extent_t<__raw_array>>; + return __constant_sequence<_Tp, _Arr[_Is]...>{}; + } + else if constexpr (::cuda::std::__is_cuda_std_array_v<__raw_array>) + { + using _Tp = typename __raw_array::value_type; + return __constant_sequence<_Tp, _Arr[_Is]...>{}; + } + else + { + static_assert(::cuda::std::__always_false_v<__raw_array>, "unsupported array type"); + } +} + +//! @brief Makes a compile-time constant argument sequence. +//! In C++17, Arr must have static storage duration. +template +_CCCL_API constexpr auto __make_constant_sequence() +{ + using __raw_array = ::cuda::std::remove_cv_t<::cuda::std::remove_reference_t>; + + static_assert(::cuda::std::is_bounded_array_v<__raw_array> || ::cuda::std::__is_cuda_std_array_v<__raw_array>, + "make_constant_sequence requires a cuda::std::array or non-empty C-style array"); + + constexpr ::cuda::std::size_t N = []() constexpr { + if constexpr (::cuda::std::is_bounded_array_v<__raw_array>) + { + return ::cuda::std::extent_v<__raw_array>; + } + else + { + return ::cuda::std::tuple_size_v<__raw_array>; + } + }(); + + return __make_constant_sequence_impl(::cuda::std::make_index_sequence{}); +} + // __assert_in_range // ===================================================================== @@ -621,8 +668,8 @@ template inline constexpr bool __is_wrapper_v> = true; template inline constexpr bool __is_wrapper_v> = true; -template -inline constexpr bool __is_wrapper_v<__constant_sequence<_Value>> = true; +template +inline constexpr bool __is_wrapper_v<__constant_sequence<_Tp, _Vs...>> = true; template inline constexpr bool __is_wrapper_v<__immediate_sequence<_Arg, _StaticBounds>> = true; template @@ -662,11 +709,11 @@ __unwrap(const constant<_Value, _Tp>&) noexcept return constant<_Value, _Tp>::__get_value(); } -template -[[nodiscard]] _CCCL_API constexpr ::cuda::std::remove_cvref_t -__unwrap(const __constant_sequence<_Value>&) noexcept +//! Unwraps a compile-time constant argument sequence into a canonical cuda::std::array value. +template +[[nodiscard]] _CCCL_API constexpr auto __unwrap(const __constant_sequence<_Tp, _Vs...>&) noexcept { - return _Value; + return ::cuda::std::array<::cuda::std::remove_cvref_t<_Tp>, sizeof...(_Vs)>{_Vs...}; } template @@ -735,32 +782,32 @@ _CCCL_API constexpr auto __constant_compute_highest() noexcept return constant<_Value, _Tp>::__get_value(); } -template -_CCCL_API constexpr auto __constant_sequence_compute_lowest() noexcept +template +_CCCL_API constexpr _Tp __constant_sequence_compute_lowest() noexcept { - using _ElementType = __element_type_of_t<::cuda::std::remove_cvref_t>; - auto __first = _Value.begin(); - auto __last = _Value.end(); - - if (__first == __last) + if constexpr (sizeof...(_Vs) == 0) { - return __type_lowest<_ElementType>(); + return __type_lowest<_Tp>(); + } + else + { + constexpr _Tp __values[] = {_Vs...}; + return static_cast<_Tp>(*::cuda::std::min_element(__values, __values + sizeof...(_Vs))); } - return static_cast<_ElementType>(*::cuda::std::min_element(__first, __last)); } -template -_CCCL_API constexpr auto __constant_sequence_compute_highest() noexcept +template +_CCCL_API constexpr _Tp __constant_sequence_compute_highest() noexcept { - using _ElementType = __element_type_of_t<::cuda::std::remove_cvref_t>; - auto __first = _Value.begin(); - auto __last = _Value.end(); - - if (__first == __last) + if constexpr (sizeof...(_Vs) == 0) { - return __type_highest<_ElementType>(); + return __type_highest<_Tp>(); + } + else + { + constexpr _Tp __values[] = {_Vs...}; + return static_cast<_Tp>(*::cuda::std::max_element(__values, __values + sizeof...(_Vs))); } - return static_cast<_ElementType>(*::cuda::std::max_element(__first, __last)); } // ===================================================================== @@ -811,17 +858,16 @@ struct __traits_impl> static constexpr element_type highest = __wrapper_static_highest(); }; -template -struct __traits_impl<__constant_sequence<_Value>> +template +struct __traits_impl<__constant_sequence<_Tp, _Vs...>> { - using value_type = ::cuda::std::remove_cvref_t; - using element_type = __element_type_of_t; - static_assert(__is_sequence_v, "The value type of __constant_sequence must be a sequence"); + using element_type = ::cuda::std::remove_cvref_t<_Tp>; + using value_type = ::cuda::std::array; static constexpr bool is_constant = true; static constexpr bool is_deferred = false; static constexpr bool is_single_value = false; - static constexpr element_type lowest = __constant_sequence_compute_lowest<_Value>(); - static constexpr element_type highest = __constant_sequence_compute_highest<_Value>(); + static constexpr element_type lowest = __constant_sequence_compute_lowest<_Tp, _Vs...>(); + static constexpr element_type highest = __constant_sequence_compute_highest<_Tp, _Vs...>(); }; template @@ -896,10 +942,10 @@ template return __constant_compute_lowest<_Value, _Tp>(); } -template -[[nodiscard]] _CCCL_API constexpr auto __lowest_(__constant_sequence<_Value>) noexcept +template +[[nodiscard]] _CCCL_API constexpr auto __lowest_(__constant_sequence<_Tp, _Vs...>) noexcept { - return __constant_sequence_compute_lowest<_Value>(); + return __constant_sequence_compute_lowest<_Tp, _Vs...>(); } template @@ -949,10 +995,10 @@ template return __constant_compute_highest<_Value, _Tp>(); } -template -[[nodiscard]] _CCCL_API constexpr auto __highest_(__constant_sequence<_Value>) noexcept +template +[[nodiscard]] _CCCL_API constexpr auto __highest_(__constant_sequence<_Tp, _Vs...>) noexcept { - return __constant_sequence_compute_highest<_Value>(); + return __constant_sequence_compute_highest<_Tp, _Vs...>(); } template diff --git a/libcudacxx/test/libcudacxx/cuda/argument/argument_traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/argument/argument_traits.pass.cpp index aaf57291e23..e3fef4affb8 100644 --- a/libcudacxx/test/libcudacxx/cuda/argument/argument_traits.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/argument/argument_traits.pass.cpp @@ -103,9 +103,13 @@ TEST_FUNC void test() static_assert(!cuda::args::__traits>::is_deferred); static_assert(!cuda::args::__traits>>::is_deferred); static_assert(!cuda::args::__traits>::is_deferred); -#if TEST_HAS_CLASS_NTTP - static_assert(!cuda::args::__traits{1, 2, 3}>>::is_deferred); -#endif // TEST_HAS_CLASS_NTTP + + static_assert(!cuda::args::__traits>::is_deferred); + static constexpr int carr[] = {1, 2, 3}; + static constexpr ::cuda::std::array cudaarr = {1, 2, 3}; + static_assert(!cuda::args::__traits())>::is_deferred); + static_assert(!cuda::args::__traits())>::is_deferred); + static_assert(cuda::args::__traits>>::is_deferred); static_assert(cuda::args::__traits>>::is_deferred); @@ -118,10 +122,11 @@ TEST_FUNC void test() static_assert(cuda::args::__traits>>::is_single_value); static_assert(!cuda::args::__traits>>::is_single_value); static_assert(cuda::args::__traits>::is_single_value); -#if TEST_HAS_CLASS_NTTP - static_assert( - !cuda::args::__traits{1, 2, 3}>>::is_single_value); -#endif // TEST_HAS_CLASS_NTTP + + static_assert(!cuda::args::__traits>::is_single_value); + static_assert(!cuda::args::__traits())>::is_single_value); + static_assert(!cuda::args::__traits())>::is_single_value); + static_assert(cuda::args::__traits>::is_single_value); static_assert(!cuda::args::__traits>>::is_single_value); @@ -134,11 +139,15 @@ TEST_FUNC void test() cuda::std::span>); static_assert(cuda::std::is_same_v>::value_type, int>); static_assert(cuda::std::is_same_v>::value_type, float>); -#if TEST_HAS_CLASS_NTTP - static_assert(cuda::std::is_same_v< - cuda::args::__traits{1, 2, 3}>>::value_type, - cuda::std::array>); -#endif // TEST_HAS_CLASS_NTTP + + static_assert(cuda::std::is_same_v>::value_type, + cuda::std::array>); + static_assert( + cuda::std::is_same_v())>::value_type, + cuda::std::array>); + static_assert( + cuda::std::is_same_v())>::value_type, + cuda::std::array>); // --- argument_traits: lowest / highest --- @@ -155,10 +164,13 @@ TEST_FUNC void test() == 8); static_assert(cuda::args::__traits>::lowest == 10.0f); static_assert(cuda::args::__traits>::highest == 10.0f); -#if TEST_HAS_CLASS_NTTP - static_assert(cuda::args::__traits{3, 1, 2}>>::lowest == 1); - static_assert(cuda::args::__traits{3, 1, 2}>>::highest == 3); -#endif // TEST_HAS_CLASS_NTTP + + static_assert(cuda::args::__traits>::lowest == 1); + static_assert(cuda::args::__traits>::highest == 3); + static_assert(cuda::args::__traits())>::lowest == 1); + static_assert(cuda::args::__traits())>::highest == 3); + static_assert(cuda::args::__traits())>::lowest == 1); + static_assert(cuda::args::__traits())>::highest == 3); // --- Free function bounds on plain values --- @@ -166,16 +178,6 @@ TEST_FUNC void test() static_assert(cuda::args::__highest_(42) == (cuda::std::numeric_limits::max)()); static_assert(cuda::args::__lowest_(1.0f) == cuda::std::numeric_limits::lowest()); static_assert(cuda::args::__highest_(1.0f) == (cuda::std::numeric_limits::max)()); - - // --- Scalar and sequence wrappers expose distinct single-value traits --- - - static_assert(cuda::args::__traits>::is_single_value); - static_assert(cuda::args::__traits>::is_single_value); - static_assert(!cuda::args::__traits>>::is_single_value); -#if TEST_HAS_CLASS_NTTP - static_assert( - !cuda::args::__traits{1, 2, 3}>>::is_single_value); -#endif // TEST_HAS_CLASS_NTTP } int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/cuda/argument/static_argument.pass.cpp b/libcudacxx/test/libcudacxx/cuda/argument/static_argument.pass.cpp index 92a7678482a..8fb56f27d47 100644 --- a/libcudacxx/test/libcudacxx/cuda/argument/static_argument.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/argument/static_argument.pass.cpp @@ -84,16 +84,24 @@ TEST_FUNC void test() } #endif // TEST_HAS_CLASS_NTTP -#if TEST_HAS_CLASS_NTTP // Array sequence { - constexpr auto sa_arr = cuda::args::__constant_sequence{128, 256, 512}>{}; + static constexpr int carr[] = {128, 256, 512}; + constexpr auto sa_arr = cuda::args::__make_constant_sequence(); + static_assert(cuda::args::__unwrap(sa_arr)[0] == 128); + static_assert(cuda::args::__unwrap(sa_arr)[1] == 256); + static_assert(cuda::args::__unwrap(sa_arr)[2] == 512); + static_assert(cuda::std::is_same_v>); + } + + { + static constexpr ::cuda::std::array cudaarr = {128, 256, 512}; + constexpr auto sa_arr = cuda::args::__make_constant_sequence(); static_assert(cuda::args::__unwrap(sa_arr)[0] == 128); static_assert(cuda::args::__unwrap(sa_arr)[1] == 256); static_assert(cuda::args::__unwrap(sa_arr)[2] == 512); static_assert(cuda::std::is_same_v>); } -#endif // TEST_HAS_CLASS_NTTP // Bounds: scalar { @@ -102,23 +110,27 @@ TEST_FUNC void test() static_assert(cuda::args::__highest_(sa) == 42); } -#if TEST_HAS_CLASS_NTTP // Bounds: array sequence computes lowest/highest of elements { - constexpr auto sa = cuda::args::__constant_sequence{128, 256, 512}>{}; + static constexpr int carr[] = {128, 256, 512}; + constexpr auto sa = cuda::args::__make_constant_sequence(); + static_assert(cuda::args::__lowest_(sa) == 128); + static_assert(cuda::args::__highest_(sa) == 512); + } + { + static constexpr ::cuda::std::array cudaarr = {128, 256, 512}; + constexpr auto sa = cuda::args::__make_constant_sequence(); static_assert(cuda::args::__lowest_(sa) == 128); static_assert(cuda::args::__highest_(sa) == 512); } -#endif // TEST_HAS_CLASS_NTTP -#if TEST_HAS_CLASS_NTTP // Bounds: empty array sequence has unconstrained element bounds { - constexpr auto sa = cuda::args::__constant_sequence{}>{}; + static constexpr ::cuda::std::array cudaarr = {}; + constexpr auto sa = cuda::args::__make_constant_sequence(); static_assert(cuda::args::__lowest_(sa) == cuda::std::numeric_limits::lowest()); static_assert(cuda::args::__highest_(sa) == (cuda::std::numeric_limits::max)()); } -#endif // TEST_HAS_CLASS_NTTP // Traits { @@ -143,25 +155,29 @@ TEST_FUNC void test() static_assert(traits::highest == 10.0f); } -#if TEST_HAS_CLASS_NTTP // Sequence traits { - using traits = cuda::args::__traits{1, 2, 3}>>; + static constexpr int carr[] = {1, 2, 3}; + using traits = cuda::args::__traits())>; + static_assert(traits::is_constant); + static_assert(!traits::is_deferred); + static_assert(!traits::is_single_value); + static_assert(cuda::std::is_same_v>); + static_assert(cuda::std::is_same_v); + } + { + static constexpr ::cuda::std::array cudaarr = {1, 2, 3}; + using traits = cuda::args::__traits())>; static_assert(traits::is_constant); static_assert(!traits::is_deferred); static_assert(!traits::is_single_value); static_assert(cuda::std::is_same_v>); static_assert(cuda::std::is_same_v); } -#endif // TEST_HAS_CLASS_NTTP - // Single value: scalar is single, sequence is not + // Single value: scalar is single { static_assert(!cuda::args::__is_sequence_v>::value_type>); -#if TEST_HAS_CLASS_NTTP - static_assert( - !cuda::args::__traits{1, 2, 3}>>::is_single_value); -#endif // TEST_HAS_CLASS_NTTP } // Unwrap: scalar @@ -178,16 +194,6 @@ TEST_FUNC void test() static_assert(val == 10.0f); static_assert(cuda::std::is_same_v); } - -#if TEST_HAS_CLASS_NTTP - // Unwrap: sequence - { - constexpr auto sa = cuda::args::__constant_sequence{10, 20, 30}>{}; - constexpr auto val = cuda::args::__unwrap(sa); - static_assert(val[0] == 10); - static_assert(val[2] == 30); - } -#endif // TEST_HAS_CLASS_NTTP } int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/cuda/argument/usage_example.pass.cpp b/libcudacxx/test/libcudacxx/cuda/argument/usage_example.pass.cpp index eada29e23de..c8801a56472 100644 --- a/libcudacxx/test/libcudacxx/cuda/argument/usage_example.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/argument/usage_example.pass.cpp @@ -73,6 +73,11 @@ TEST_FUNC constexpr int process_segments(_SegSizeArg __seg_size) } } +constexpr ::cuda::std::array cudaarr_shared = {64, 128, 256}; +constexpr int carr_shared[] = {64, 128, 256}; +constexpr ::cuda::std::array cudaarr_global = {64, 128, 512}; +constexpr int carr_global[] = {64, 128, 512}; + TEST_FUNC constexpr bool test() { // Plain scalar: no bounds, global memory, buffer clamped to default @@ -101,10 +106,15 @@ TEST_FUNC constexpr bool test() assert(process_segments(seg_size) == 128); } -#if TEST_HAS_CLASS_NTTP // __constant_sequence: array sequence, highest fits in shared memory { - constexpr auto seg_sizes = cuda::args::__constant_sequence{}; + constexpr auto seg_sizes = cuda::args::__make_constant_sequence(); + static_assert(select_variant(seg_sizes) == algorithm_variant::shared_memory); + assert(compute_buffer_size(seg_sizes, 3) == 256 * 3); + assert(process_segments(seg_sizes) == 64 + 128 + 256); + } + { + constexpr auto seg_sizes = cuda::args::__make_constant_sequence(); static_assert(select_variant(seg_sizes) == algorithm_variant::shared_memory); assert(compute_buffer_size(seg_sizes, 3) == 256 * 3); assert(process_segments(seg_sizes) == 64 + 128 + 256); @@ -112,12 +122,17 @@ TEST_FUNC constexpr bool test() // __constant_sequence: array sequence, highest exceeds shared memory, buffer clamped { - constexpr auto seg_sizes = cuda::args::__constant_sequence{}; + constexpr auto seg_sizes = cuda::args::__make_constant_sequence(); + static_assert(select_variant(seg_sizes) == algorithm_variant::global_memory); + assert(compute_buffer_size(seg_sizes, 3) == 512 * 3); + assert(process_segments(seg_sizes) == 64 + 128 + 512); + } + { + constexpr auto seg_sizes = cuda::args::__make_constant_sequence(); static_assert(select_variant(seg_sizes) == algorithm_variant::global_memory); assert(compute_buffer_size(seg_sizes, 3) == 512 * 3); assert(process_segments(seg_sizes) == 64 + 128 + 512); } -#endif // TEST_HAS_CLASS_NTTP // immediate: tight static bounds, shared memory, buffer = value { @@ -145,8 +160,9 @@ TEST_FUNC constexpr bool test() // __immediate_sequence: per-segment span with runtime bounds only { - int sizes[3] = {64, 128, 96}; - auto seg_sizes = cuda::args::__immediate_sequence{cuda::std::span{sizes, 3}, cuda::args::bounds(1, 200)}; + int sizes[3] = {64, 128, 96}; + [[maybe_unused]] auto seg_sizes = + cuda::args::__immediate_sequence{cuda::std::span{sizes, 3}, cuda::args::bounds(1, 200)}; assert(select_variant(seg_sizes) == algorithm_variant::global_memory); assert(compute_buffer_size(seg_sizes, 3) == 200 * 3); assert(process_segments(seg_sizes) == 64 + 128 + 96);