NVIDIA · temujinkz · Jun 27, 2026 · coderabbitai · Jun 29, 2026 · coderabbitai
@@ -8,6 +8,7 @@ Bit
    :maxdepth: 1
 
    bit/bitmask
+   bit/bit_msb
    bit/bit_reverse
    bit/bitfield_insert
    bit/bitfield_extract
@@ -26,6 +27,11 @@ Bit
      - CCCL 3.0.0
      - CUDA 13.0
 
+   * - :ref:`bit_msb <libcudacxx-extended-api-bit-bit_msb>`
+     - Find the most significant set bit
+     - CCCL 3.5.0
+     - CUDA 13.x
+
    * - :ref:`bit_reverse <libcudacxx-extended-api-bit-bit_reverse>`
      - Reverse the order of bits
      - CCCL 3.0.0

@@ -0,0 +1,56 @@
+.. _libcudacxx-extended-api-bit-bit_msb:
+
+``cuda::bit_msb``
+=================
+
+Defined in the ``<cuda/bit>`` header.
+
+.. code:: cuda
+
+   namespace cuda {
+
+   template <typename T>
+   [[nodiscard]] __host__ __device__ constexpr
+   int bit_msb(T value) noexcept;
+
+   } // namespace cuda
+
+The function returns the zero-based index of the most significant set bit of ``value`` (that is, ``floor(log2(value))``), or ``-1`` if ``value`` is zero. It is the most-significant counterpart to :ref:`bit_ffs <libcudacxx-extended-api-bit-bit_ffs>` (find first set).
+
+**Parameters**
+
+- ``value``: the unsigned integer value to scan.
+
+**Return value**
+
+- ``-1`` if ``value`` is zero, otherwise the zero-based position of the most significant set bit.
+
+**Constraints**
+
+- ``T`` is an unsigned integral type.
+
+.. note::
+
+    For a non-zero ``value``, ``bit_msb(value)`` equals ``cuda::std::bit_width(value) - 1``. It is provided as a safe, type-generic way to get the most significant bit index on all supported integer types.
+
+Example
+-------
+
+.. code:: cuda
+
+    #include <cuda/bit>
+    #include <cuda/std/cassert>
+    #include <cuda/std/cstdint>
+
+    __global__ void bit_msb_kernel() {
+        assert(cuda::bit_msb(uint32_t{0}) == -1);
+        assert(cuda::bit_msb(uint32_t{1}) == 0);
+        assert(cuda::bit_msb(uint32_t{0b10101000}) == 7);
+        assert(cuda::bit_msb(~uint32_t{0}) == 31);
+    }
+
+    int main() {
+        bit_msb_kernel<<<1, 1>>>();
+        cudaDeviceSynchronize();
+        return 0;
+    }
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___BIT_BIT_MSB_H
+#define _CUDA___BIT_BIT_MSB_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__bit/integral.h>
+#include <cuda/std/__type_traits/is_unsigned_integer.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA
+
+// Returns the zero-based index of the most significant set bit of __value, or -1 if __value is zero,
+// i.e. floor(log2(__value)). This is the most-significant counterpart to bit_ffs (find first set).
+// It forwards to cuda::std::__bit_log2, which already lowers to the optimal find-leading-bit code
+// (ptx::bfind on device, a countl_zero based path on host), and exposes it type safely.
+_CCCL_TEMPLATE(class _Tp)
+_CCCL_REQUIRES(::cuda::std::__cccl_is_unsigned_integer_v<_Tp>)
+[[nodiscard]] _CCCL_API constexpr int bit_msb(_Tp __value) noexcept
+{
+  return static_cast<int>(::cuda::std::__bit_log2(__value));
+}
+
+_CCCL_END_NAMESPACE_CUDA
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA___BIT_BIT_MSB_H
@@ -21,6 +21,7 @@
 #  pragma system_header
 #endif // no system header
 
+#include <cuda/__bit/bit_msb.h>
 #include <cuda/__bit/bit_reverse.h>
 #include <cuda/__bit/bitfield.h>
 #include <cuda/__bit/bitmask.h>

@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cuda/bit>
+#include <cuda/std/cassert>
+#include <cuda/std/cstddef>
+#include <cuda/std/cstdint>
+#include <cuda/std/limits>
+#include <cuda/std/type_traits>
+
+#include "test_macros.h"
+
+// bit_msb must only accept unsigned integer types, not bool, signed integers, floating point, or enums.
+enum class Enum
+{
+  value
+};
+
+template <class T>
+_CCCL_CONCEPT can_bit_msb = _CCCL_REQUIRES_EXPR((T), T value)(cuda::bit_msb(value));
+
+static_assert(can_bit_msb<unsigned char>);
+static_assert(can_bit_msb<unsigned int>);
+static_assert(can_bit_msb<unsigned long long>);
+static_assert(!can_bit_msb<bool>);
+static_assert(!can_bit_msb<int>);
+static_assert(!can_bit_msb<float>);
+static_assert(!can_bit_msb<Enum>);
+
+template <typename T>
+TEST_FUNC constexpr void test()
+{
+  using nl                              = cuda::std::numeric_limits<T>;
+  [[maybe_unused]] constexpr T all_ones = static_cast<T>(~T{0});
+
+  // a zero input has no set bit and returns -1
+  assert(cuda::bit_msb(T{0}) == -1);
+  // the least significant bit sits at index 0
+  assert(cuda::bit_msb(T{1}) == 0);
+  // a single bit set at position k returns k
+  assert(cuda::bit_msb(static_cast<T>(T{1} << 3)) == 3);
+  assert(cuda::bit_msb(static_cast<T>(T{1} << (nl::digits - 1))) == nl::digits - 1);
+  // the highest set bit wins when several bits are set
+  assert(cuda::bit_msb(static_cast<T>(0b10101000)) == 7);
+  assert(cuda::bit_msb(all_ones) == nl::digits - 1);
+}
+
+TEST_FUNC constexpr bool test()
+{
+  test<unsigned char>();
+  test<unsigned short>();
+  test<unsigned>();
+  test<unsigned long>();
+  test<unsigned long long>();
+
+  test<uint8_t>();
+  test<uint16_t>();
+  test<uint32_t>();
+  test<uint64_t>();
+  test<size_t>();
+  test<uintmax_t>();
+  test<uintptr_t>();
+
+#if _CCCL_HAS_INT128()
+  test<__uint128_t>();
+#endif // _CCCL_HAS_INT128()
+  return true;
+}
+
+int main(int, char**)
+{
+  assert(test());
+  static_assert(test());
+  return 0;
+}