diff --git a/docs/libcudacxx/extended_api/bit.rst b/docs/libcudacxx/extended_api/bit.rst index 521a663c1fc..b9d31d38131 100644 --- a/docs/libcudacxx/extended_api/bit.rst +++ b/docs/libcudacxx/extended_api/bit.rst @@ -8,6 +8,7 @@ Bit :maxdepth: 1 bit/bitmask + bit/bit_msb bit/bit_reverse bit/bitfield_insert bit/bitfield_extract @@ -26,6 +27,11 @@ Bit - CCCL 3.0.0 - CUDA 13.0 + * - :ref:`bit_msb ` + - Find the most significant set bit + - CCCL 3.5.0 + - CUDA 13.x + * - :ref:`bit_reverse ` - Reverse the order of bits - CCCL 3.0.0 diff --git a/docs/libcudacxx/extended_api/bit/bit_msb.rst b/docs/libcudacxx/extended_api/bit/bit_msb.rst new file mode 100644 index 00000000000..6e0270f0ba7 --- /dev/null +++ b/docs/libcudacxx/extended_api/bit/bit_msb.rst @@ -0,0 +1,56 @@ +.. _libcudacxx-extended-api-bit-bit_msb: + +``cuda::bit_msb`` +================= + +Defined in the ```` header. + +.. code:: cuda + + namespace cuda { + + template + [[nodiscard]] __host__ __device__ constexpr + int bit_msb(T value) noexcept; + + } // namespace cuda + +The function returns the zero-based index of the most significant set bit of ``value`` (that is, ``floor(log2(value))``), or ``-1`` if ``value`` is zero. It is the most-significant counterpart to :ref:`bit_ffs ` (find first set). + +**Parameters** + +- ``value``: the unsigned integer value to scan. + +**Return value** + +- ``-1`` if ``value`` is zero, otherwise the zero-based position of the most significant set bit. + +**Constraints** + +- ``T`` is an unsigned integral type. + +.. note:: + + For a non-zero ``value``, ``bit_msb(value)`` equals ``cuda::std::bit_width(value) - 1``. It is provided as a safe, type-generic way to get the most significant bit index on all supported integer types. + +Example +------- + +.. code:: cuda + + #include + #include + #include + + __global__ void bit_msb_kernel() { + assert(cuda::bit_msb(uint32_t{0}) == -1); + assert(cuda::bit_msb(uint32_t{1}) == 0); + assert(cuda::bit_msb(uint32_t{0b10101000}) == 7); + assert(cuda::bit_msb(~uint32_t{0}) == 31); + } + + int main() { + bit_msb_kernel<<<1, 1>>>(); + cudaDeviceSynchronize(); + return 0; + } diff --git a/libcudacxx/include/cuda/__bit/bit_msb.h b/libcudacxx/include/cuda/__bit/bit_msb.h new file mode 100644 index 00000000000..23696ba4f4a --- /dev/null +++ b/libcudacxx/include/cuda/__bit/bit_msb.h @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___BIT_BIT_MSB_H +#define _CUDA___BIT_BIT_MSB_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +#include + +_CCCL_BEGIN_NAMESPACE_CUDA + +// Returns the zero-based index of the most significant set bit of __value, or -1 if __value is zero, +// i.e. floor(log2(__value)). This is the most-significant counterpart to bit_ffs (find first set). +// It forwards to cuda::std::__bit_log2, which already lowers to the optimal find-leading-bit code +// (ptx::bfind on device, a countl_zero based path on host), and exposes it type safely. +_CCCL_TEMPLATE(class _Tp) +_CCCL_REQUIRES(::cuda::std::__cccl_is_unsigned_integer_v<_Tp>) +[[nodiscard]] _CCCL_API constexpr int bit_msb(_Tp __value) noexcept +{ + return static_cast(::cuda::std::__bit_log2(__value)); +} + +_CCCL_END_NAMESPACE_CUDA + +#include + +#endif // _CUDA___BIT_BIT_MSB_H diff --git a/libcudacxx/include/cuda/bit b/libcudacxx/include/cuda/bit index 6e7325fef87..362e12c1cf9 100644 --- a/libcudacxx/include/cuda/bit +++ b/libcudacxx/include/cuda/bit @@ -21,6 +21,7 @@ # pragma system_header #endif // no system header +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/bit/bit_msb.pass.cpp b/libcudacxx/test/libcudacxx/cuda/bit/bit_msb.pass.cpp new file mode 100644 index 00000000000..457cf0498db --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/bit/bit_msb.pass.cpp @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +// bit_msb must only accept unsigned integer types, not bool, signed integers, floating point, or enums. +enum class Enum +{ + value +}; + +template +_CCCL_CONCEPT can_bit_msb = _CCCL_REQUIRES_EXPR((T), T value)(cuda::bit_msb(value)); + +static_assert(can_bit_msb); +static_assert(can_bit_msb); +static_assert(can_bit_msb); +static_assert(!can_bit_msb); +static_assert(!can_bit_msb); +static_assert(!can_bit_msb); +static_assert(!can_bit_msb); + +template +TEST_FUNC constexpr void test() +{ + using nl = cuda::std::numeric_limits; + [[maybe_unused]] constexpr T all_ones = static_cast(~T{0}); + + // a zero input has no set bit and returns -1 + assert(cuda::bit_msb(T{0}) == -1); + // the least significant bit sits at index 0 + assert(cuda::bit_msb(T{1}) == 0); + // a single bit set at position k returns k + assert(cuda::bit_msb(static_cast(T{1} << 3)) == 3); + assert(cuda::bit_msb(static_cast(T{1} << (nl::digits - 1))) == nl::digits - 1); + // the highest set bit wins when several bits are set + assert(cuda::bit_msb(static_cast(0b10101000)) == 7); + assert(cuda::bit_msb(all_ones) == nl::digits - 1); +} + +TEST_FUNC constexpr bool test() +{ + test(); + test(); + test(); + test(); + test(); + + test(); + test(); + test(); + test(); + test(); + test(); + test(); + +#if _CCCL_HAS_INT128() + test<__uint128_t>(); +#endif // _CCCL_HAS_INT128() + return true; +} + +int main(int, char**) +{ + assert(test()); + static_assert(test()); + return 0; +}