From 49e37ade1ad618b96ae324497a18dcff70748a27 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Fri, 26 Jun 2026 08:49:21 +0200 Subject: [PATCH] [CUB] Cleanup some dispatch arguments --- .../device/dispatch/dispatch_histogram.cuh | 291 +++++++++--------- 1 file changed, 150 insertions(+), 141 deletions(-) diff --git a/cub/cub/device/dispatch/dispatch_histogram.cuh b/cub/cub/device/dispatch/dispatch_histogram.cuh index d3bd269a7e0..10e35eb91c1 100644 --- a/cub/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/cub/device/dispatch/dispatch_histogram.cuh @@ -518,29 +518,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device constexpr int PRIVATIZED_SMEM_BINS = 0; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - true, // IsDeviceInit - true, // IsEven - false // IsByteSample - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - num_output_levels, - upper_level, - lower_level, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + num_output_levels, + upper_level, + lower_level, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -551,29 +550,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device constexpr int PRIVATIZED_SMEM_BINS = detail::histogram::max_privatized_smem_bins; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - true, // IsDeviceInit - true, // IsEven - false // IsByteSample - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - num_output_levels, - upper_level, - lower_level, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + num_output_levels, + upper_level, + lower_level, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -692,29 +690,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device constexpr int PRIVATIZED_SMEM_BINS = 256; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - true, // IsDeviceInit - true, // IsEven - true // IsByteSample - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_privatized_levels, - num_output_levels, - upper_level, - lower_level, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_privatized_levels, + num_output_levels, + upper_level, + lower_level, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -844,29 +841,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range( constexpr int PRIVATIZED_SMEM_BINS = 256; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - false, // IsDeviceInit - false, // IsEven (unused for host-init) - false // IsByteSample (unused for host-init) - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_privatized_levels, - num_output_levels, - output_decode_op, - privatized_decode_op, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_privatized_levels, + num_output_levels, + output_decode_op, + privatized_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -902,29 +898,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range( constexpr int PRIVATIZED_SMEM_BINS = 0; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - false, // IsDeviceInit - false, // IsEven (unused for host-init) - false // IsByteSample (unused for host-init) - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - num_output_levels, - output_decode_op, - privatized_decode_op, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + num_output_levels, + output_decode_op, + privatized_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -935,29 +930,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range( constexpr int PRIVATIZED_SMEM_BINS = max_privatized_smem_bins; if (const auto error = CubDebug( - (detail::histogram::dispatch< - NUM_CHANNELS, - NUM_ACTIVE_CHANNELS, - PRIVATIZED_SMEM_BINS, - false, // IsDeviceInit - false, // IsEven (unused for host-init) - false // IsByteSample (unused for host-init) - >(d_temp_storage, - temp_storage_bytes, - d_samples, - d_output_histograms, - num_output_levels, - num_output_levels, - output_decode_op, - privatized_decode_op, - max_num_output_bins, - num_row_pixels, - num_rows, - row_stride_samples, - stream, - policy_selector, - kernel_source, - launcher_factory)))) + (detail::histogram::dispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + num_output_levels, + output_decode_op, + privatized_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + stream, + policy_selector, + kernel_source, + launcher_factory)))) { return error; } @@ -1040,7 +1034,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even( constexpr int PRIVATIZED_SMEM_BINS = 256; if (const auto error = CubDebug( - (detail::histogram::dispatch( + (detail::histogram::dispatch( d_temp_storage, temp_storage_bytes, d_samples, @@ -1103,7 +1102,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even( constexpr int PRIVATIZED_SMEM_BINS = 0; if (const auto error = CubDebug( - (detail::histogram::dispatch( + (detail::histogram::dispatch( d_temp_storage, temp_storage_bytes, d_samples, @@ -1129,7 +1133,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even( constexpr int PRIVATIZED_SMEM_BINS = max_privatized_smem_bins; if (const auto error = CubDebug( - (detail::histogram::dispatch( + (detail::histogram::dispatch( d_temp_storage, temp_storage_bytes, d_samples,