From 1b01df357a841e4aa29f3a40bc1162f1380269fb Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 4 Dec 2024 08:22:36 -0500 Subject: [PATCH] Use grid_1d utilities in copy_range.cuh (#17409) Use the `grid_1d` utilities to manage thread and stride calculations in the `copy_range.cuh` kernels. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/17409 --- cpp/include/cudf/detail/copy_range.cuh | 8 ++++---- cpp/include/cudf/detail/null_mask.cuh | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index fcb80fe45f7..022c5c40ea0 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -56,15 +56,15 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin, constexpr cudf::size_type leader_lane{0}; int const lane_id = threadIdx.x % warp_size; - cudf::size_type const tid = threadIdx.x + blockIdx.x * blockDim.x; - int const warp_id = tid / warp_size; + auto const tid = cudf::detail::grid_1d::global_thread_id(); + auto const warp_id = tid / warp_size; cudf::size_type const offset = target.offset(); cudf::size_type const begin_mask_idx = cudf::word_index(offset + target_begin); cudf::size_type const end_mask_idx = cudf::word_index(offset + target_end); cudf::size_type mask_idx = begin_mask_idx + warp_id; - cudf::size_type const masks_per_grid = gridDim.x * blockDim.x / warp_size; + cudf::size_type const masks_per_grid = cudf::detail::grid_1d::grid_stride() / warp_size; cudf::size_type target_offset = begin_mask_idx * warp_size - (offset + target_begin); cudf::size_type source_idx = tid + target_offset; @@ -92,7 +92,7 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin, } } - source_idx += blockDim.x * gridDim.x; + source_idx += cudf::detail::grid_1d::grid_stride(); mask_idx += masks_per_grid; } diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 025e2ccc3ec..17ecc0f5539 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -67,7 +67,7 @@ CUDF_KERNEL void offset_bitmask_binop(Binop op, size_type source_size_bits, size_type* count_ptr) { - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto const tid = cudf::detail::grid_1d::global_thread_id(); auto const last_bit_index = source_size_bits - 1; auto const last_word_index = cudf::word_index(last_bit_index); @@ -75,7 +75,7 @@ CUDF_KERNEL void offset_bitmask_binop(Binop op, size_type thread_count = 0; for (size_type destination_word_index = tid; destination_word_index < destination.size(); - destination_word_index += blockDim.x * gridDim.x) { + destination_word_index += cudf::detail::grid_1d::grid_stride()) { bitmask_type destination_word = detail::get_mask_offset_word(source[0], destination_word_index, @@ -214,8 +214,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b { constexpr size_type const word_size_in_bits{detail::size_in_bits()}; - size_type const tid = threadIdx.x + blockIdx.x * blockDim.x; - size_type range_id = tid; + auto range_id = cudf::detail::grid_1d::global_thread_id(); while (range_id < num_ranges) { size_type const first_bit_index = *(first_bit_indices + range_id); @@ -243,7 +242,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b // Update the null count with the computed delta. size_type updated_null_count = *(null_counts + range_id) + delta; *(null_counts + range_id) = updated_null_count; - range_id += blockDim.x * gridDim.x; + range_id += cudf::detail::grid_1d::grid_stride(); } }