Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 150 additions & 141 deletions cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -518,29 +518,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device
constexpr int PRIVATIZED_SMEM_BINS = 0;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
true, // IsDeviceInit
true, // IsEven
false // IsByteSample
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ true,
/* IsEven = */ true,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand All @@ -551,29 +550,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device
constexpr int PRIVATIZED_SMEM_BINS = detail::histogram::max_privatized_smem_bins;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
true, // IsDeviceInit
true, // IsEven
false // IsByteSample
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ true,
/* IsEven = */ true,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand Down Expand Up @@ -692,29 +690,28 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t __dispatch_even_device
constexpr int PRIVATIZED_SMEM_BINS = 256;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
true, // IsDeviceInit
true, // IsEven
true // IsByteSample
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_privatized_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ true,
/* IsEven = */ true,
/* IsByteSample = */ true>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_privatized_levels,
num_output_levels,
upper_level,
lower_level,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand Down Expand Up @@ -844,29 +841,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range(
constexpr int PRIVATIZED_SMEM_BINS = 256;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
false, // IsDeviceInit
false, // IsEven (unused for host-init)
false // IsByteSample (unused for host-init)
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_privatized_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_privatized_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand Down Expand Up @@ -902,29 +898,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range(
constexpr int PRIVATIZED_SMEM_BINS = 0;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
false, // IsDeviceInit
false, // IsEven (unused for host-init)
false // IsByteSample (unused for host-init)
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand All @@ -935,29 +930,28 @@ CUB_RUNTIME_FUNCTION static cudaError_t dispatch_range(
constexpr int PRIVATIZED_SMEM_BINS = max_privatized_smem_bins;

if (const auto error = CubDebug(
(detail::histogram::dispatch<
NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
false, // IsDeviceInit
false, // IsEven (unused for host-init)
false // IsByteSample (unused for host-init)
>(d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
d_output_histograms,
num_output_levels,
num_output_levels,
output_decode_op,
privatized_decode_op,
max_num_output_bins,
num_row_pixels,
num_rows,
row_stride_samples,
stream,
policy_selector,
kernel_source,
launcher_factory))))
{
return error;
}
Expand Down Expand Up @@ -1040,7 +1034,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even(
constexpr int PRIVATIZED_SMEM_BINS = 256;

if (const auto error = CubDebug(
(detail::histogram::dispatch<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, PRIVATIZED_SMEM_BINS, false, false, false>(
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
Expand Down Expand Up @@ -1103,7 +1102,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even(
constexpr int PRIVATIZED_SMEM_BINS = 0;

if (const auto error = CubDebug(
(detail::histogram::dispatch<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, PRIVATIZED_SMEM_BINS, false, false, false>(
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
Expand All @@ -1129,7 +1133,12 @@ CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_even(
constexpr int PRIVATIZED_SMEM_BINS = max_privatized_smem_bins;

if (const auto error = CubDebug(
(detail::histogram::dispatch<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, PRIVATIZED_SMEM_BINS, false, false, false>(
(detail::histogram::dispatch<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
PRIVATIZED_SMEM_BINS,
/* IsDeviceInit = */ false,
/* IsEven = */ false,
/* IsByteSample = */ false>(
d_temp_storage,
temp_storage_bytes,
d_samples,
Expand Down
Loading