-
Notifications
You must be signed in to change notification settings - Fork 419
Add cuda::bit_msb to <cuda/bit> #9624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| .. _libcudacxx-extended-api-bit-bit_msb: | ||
|
|
||
| ``cuda::bit_msb`` | ||
| ================= | ||
|
|
||
| Defined in the ``<cuda/bit>`` header. | ||
|
|
||
| .. code:: cuda | ||
|
|
||
| namespace cuda { | ||
|
|
||
| template <typename T> | ||
| [[nodiscard]] __host__ __device__ constexpr | ||
| int bit_msb(T value) noexcept; | ||
|
|
||
| } // namespace cuda | ||
|
|
||
| The function returns the zero-based index of the most significant set bit of ``value`` (that is, ``floor(log2(value))``), or ``-1`` if ``value`` is zero. It is the most-significant counterpart to :ref:`bit_ffs <libcudacxx-extended-api-bit-bit_ffs>` (find first set). | ||
|
|
||
| **Parameters** | ||
|
|
||
| - ``value``: the unsigned integer value to scan. | ||
|
|
||
| **Return value** | ||
|
|
||
| - ``-1`` if ``value`` is zero, otherwise the zero-based position of the most significant set bit. | ||
|
|
||
| **Constraints** | ||
|
|
||
| - ``T`` is an unsigned integral type. | ||
|
|
||
| .. note:: | ||
|
|
||
| For a non-zero ``value``, ``bit_msb(value)`` equals ``cuda::std::bit_width(value) - 1``. It is provided as a safe, type-generic way to get the most significant bit index on all supported integer types. | ||
|
|
||
| Example | ||
| ------- | ||
|
|
||
| .. code:: cuda | ||
|
|
||
| #include <cuda/bit> | ||
| #include <cuda/std/cassert> | ||
| #include <cuda/std/cstdint> | ||
|
|
||
| __global__ void bit_msb_kernel() { | ||
| assert(cuda::bit_msb(uint32_t{0}) == -1); | ||
| assert(cuda::bit_msb(uint32_t{1}) == 0); | ||
| assert(cuda::bit_msb(uint32_t{0b10101000}) == 7); | ||
| assert(cuda::bit_msb(~uint32_t{0}) == 31); | ||
| } | ||
|
|
||
| int main() { | ||
| bit_msb_kernel<<<1, 1>>>(); | ||
| cudaDeviceSynchronize(); | ||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA___BIT_BIT_MSB_H | ||
| #define _CUDA___BIT_BIT_MSB_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/std/__bit/integral.h> | ||
| #include <cuda/std/__type_traits/is_unsigned_integer.h> | ||
|
|
||
| #include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA | ||
|
|
||
| // Returns the zero-based index of the most significant set bit of __value, or -1 if __value is zero, | ||
| // i.e. floor(log2(__value)). This is the most-significant counterpart to bit_ffs (find first set). | ||
| // It forwards to cuda::std::__bit_log2, which already lowers to the optimal find-leading-bit code | ||
| // (ptx::bfind on device, a countl_zero based path on host), and exposes it type safely. | ||
| _CCCL_TEMPLATE(class _Tp) | ||
| _CCCL_REQUIRES(::cuda::std::__cccl_is_unsigned_integer_v<_Tp>) | ||
| [[nodiscard]] _CCCL_API constexpr int bit_msb(_Tp __value) noexcept | ||
| { | ||
| return static_cast<int>(::cuda::std::__bit_log2(__value)); | ||
|
Comment on lines
+37
to
+39
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎯 Functional Correctness | 🟡 Minor | ⚡ Quick win 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
printf '%s\n' "== file outline =="
ast-grep outline libcudacxx/include/cuda/__bit/bit_msb.h --view expanded || true
printf '\n%s\n' "== relevant file excerpt =="
sed -n '1,120p' libcudacxx/include/cuda/__bit/bit_msb.h | cat -n
printf '\n%s\n' "== related integral helper excerpt =="
sed -n '1,140p' libcudacxx/include/cuda/std/__bit/integral.h | cat -nRepository: NVIDIA/cccl Length of output: 8310 🏁 Script executed: #!/bin/bash
set -euo pipefail
python3 - <<'PY'
# Read-only semantic probe for the reviewed cast.
# This does not execute repository code; it just models the relevant C++ rule.
import ctypes
u32 = 0xFFFFFFFF
# On common platforms, casting to a signed 32-bit int yields -1.
i32 = ctypes.c_int(u32).value
# On a hypothetical 64-bit int, the same source value would remain positive.
i64 = ctypes.c_longlong(u32).value
print("c_int(0xFFFFFFFF) =", i32)
print("c_longlong(0xFFFFFFFF) =", i64)
print("portable_negative_one_via_cast =", i32 == -1 and i64 == -1)
PYRepository: NVIDIA/cccl Length of output: 247 🏁 Script executed: #!/bin/bash
set -euo pipefail
python3 - <<'PY'
# Demonstrate the portability issue in standard C++ terms.
# If an unsigned value is not representable in the target signed type,
# the result of the conversion is implementation-defined.
print("static_cast<int>(0xFFFFFFFFu) is not guaranteed to produce -1 on every C++ implementation.")
PYRepository: NVIDIA/cccl Length of output: 240 important: avoid relying on the implementation-defined cast for the zero case. Source: Path instructions |
||
| } | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA | ||
|
|
||
| #include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // _CUDA___BIT_BIT_MSB_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include <cuda/bit> | ||
| #include <cuda/std/cassert> | ||
| #include <cuda/std/cstddef> | ||
| #include <cuda/std/cstdint> | ||
| #include <cuda/std/limits> | ||
| #include <cuda/std/type_traits> | ||
|
|
||
| #include "test_macros.h" | ||
|
|
||
| // bit_msb must only accept unsigned integer types, not bool, signed integers, floating point, or enums. | ||
| enum class Enum | ||
| { | ||
| value | ||
| }; | ||
|
|
||
| template <class T> | ||
| _CCCL_CONCEPT can_bit_msb = _CCCL_REQUIRES_EXPR((T), T value)(cuda::bit_msb(value)); | ||
|
|
||
| static_assert(can_bit_msb<unsigned char>); | ||
| static_assert(can_bit_msb<unsigned int>); | ||
| static_assert(can_bit_msb<unsigned long long>); | ||
| static_assert(!can_bit_msb<bool>); | ||
| static_assert(!can_bit_msb<int>); | ||
| static_assert(!can_bit_msb<float>); | ||
| static_assert(!can_bit_msb<Enum>); | ||
|
|
||
| template <typename T> | ||
| TEST_FUNC constexpr void test() | ||
| { | ||
| using nl = cuda::std::numeric_limits<T>; | ||
| [[maybe_unused]] constexpr T all_ones = static_cast<T>(~T{0}); | ||
|
|
||
| // a zero input has no set bit and returns -1 | ||
| assert(cuda::bit_msb(T{0}) == -1); | ||
| // the least significant bit sits at index 0 | ||
| assert(cuda::bit_msb(T{1}) == 0); | ||
| // a single bit set at position k returns k | ||
| assert(cuda::bit_msb(static_cast<T>(T{1} << 3)) == 3); | ||
| assert(cuda::bit_msb(static_cast<T>(T{1} << (nl::digits - 1))) == nl::digits - 1); | ||
| // the highest set bit wins when several bits are set | ||
| assert(cuda::bit_msb(static_cast<T>(0b10101000)) == 7); | ||
| assert(cuda::bit_msb(all_ones) == nl::digits - 1); | ||
| } | ||
|
|
||
| TEST_FUNC constexpr bool test() | ||
| { | ||
| test<unsigned char>(); | ||
| test<unsigned short>(); | ||
| test<unsigned>(); | ||
| test<unsigned long>(); | ||
| test<unsigned long long>(); | ||
|
|
||
| test<uint8_t>(); | ||
| test<uint16_t>(); | ||
| test<uint32_t>(); | ||
| test<uint64_t>(); | ||
| test<size_t>(); | ||
| test<uintmax_t>(); | ||
| test<uintptr_t>(); | ||
|
|
||
| #if _CCCL_HAS_INT128() | ||
| test<__uint128_t>(); | ||
| #endif // _CCCL_HAS_INT128() | ||
| return true; | ||
| } | ||
|
|
||
| int main(int, char**) | ||
| { | ||
| assert(test()); | ||
| static_assert(test()); | ||
| return 0; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🎯 Functional Correctness | 🟡 Minor | ⚡ Quick win
important: Make the example self-contained.
The snippet calls
cudaDeviceSynchronize()but never includes a CUDA runtime header, so it is not buildable as written. Add the appropriate runtime include (for example, the project-preferred CUDA runtime API header) or drop the synchronization call from the example. As per path instructions, documentation changes should prioritize buildable examples and technical accuracy.Source: Path instructions