Skip to content

Commit

Permalink
Use grid_1d utilities in copy_range.cuh (#17409)
Browse files Browse the repository at this point in the history
Use the `grid_1d` utilities to manage thread and stride calculations in the `copy_range.cuh` kernels.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)

URL: #17409
  • Loading branch information
davidwendt authored Dec 4, 2024
1 parent 541e7e8 commit 1b01df3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
8 changes: 4 additions & 4 deletions cpp/include/cudf/detail/copy_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin,
constexpr cudf::size_type leader_lane{0};
int const lane_id = threadIdx.x % warp_size;

cudf::size_type const tid = threadIdx.x + blockIdx.x * blockDim.x;
int const warp_id = tid / warp_size;
auto const tid = cudf::detail::grid_1d::global_thread_id();
auto const warp_id = tid / warp_size;

cudf::size_type const offset = target.offset();
cudf::size_type const begin_mask_idx = cudf::word_index(offset + target_begin);
cudf::size_type const end_mask_idx = cudf::word_index(offset + target_end);

cudf::size_type mask_idx = begin_mask_idx + warp_id;
cudf::size_type const masks_per_grid = gridDim.x * blockDim.x / warp_size;
cudf::size_type const masks_per_grid = cudf::detail::grid_1d::grid_stride() / warp_size;

cudf::size_type target_offset = begin_mask_idx * warp_size - (offset + target_begin);
cudf::size_type source_idx = tid + target_offset;
Expand Down Expand Up @@ -92,7 +92,7 @@ CUDF_KERNEL void copy_range_kernel(SourceValueIterator source_value_begin,
}
}

source_idx += blockDim.x * gridDim.x;
source_idx += cudf::detail::grid_1d::grid_stride();
mask_idx += masks_per_grid;
}

Expand Down
9 changes: 4 additions & 5 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ CUDF_KERNEL void offset_bitmask_binop(Binop op,
size_type source_size_bits,
size_type* count_ptr)
{
auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
auto const tid = cudf::detail::grid_1d::global_thread_id();

auto const last_bit_index = source_size_bits - 1;
auto const last_word_index = cudf::word_index(last_bit_index);

size_type thread_count = 0;

for (size_type destination_word_index = tid; destination_word_index < destination.size();
destination_word_index += blockDim.x * gridDim.x) {
destination_word_index += cudf::detail::grid_1d::grid_stride()) {
bitmask_type destination_word =
detail::get_mask_offset_word(source[0],
destination_word_index,
Expand Down Expand Up @@ -214,8 +214,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b
{
constexpr size_type const word_size_in_bits{detail::size_in_bits<bitmask_type>()};

size_type const tid = threadIdx.x + blockIdx.x * blockDim.x;
size_type range_id = tid;
auto range_id = cudf::detail::grid_1d::global_thread_id();

while (range_id < num_ranges) {
size_type const first_bit_index = *(first_bit_indices + range_id);
Expand Down Expand Up @@ -243,7 +242,7 @@ CUDF_KERNEL void subtract_set_bits_range_boundaries_kernel(bitmask_type const* b
// Update the null count with the computed delta.
size_type updated_null_count = *(null_counts + range_id) + delta;
*(null_counts + range_id) = updated_null_count;
range_id += blockDim.x * gridDim.x;
range_id += cudf::detail::grid_1d::grid_stride();
}
}

Expand Down

0 comments on commit 1b01df3

Please sign in to comment.