Skip to content

Commit c7bfa77

Browse files
authored
Fix integer overflow in compiled binaryop (#17354)
For large columns, the computed stride might end up overflowing size_type. To fix this, use the grid_1d helper. See also #10368. - Closes #17353 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) - Tianyu Liu (https://github.com/kingcrimsontianyu) - Muhammad Haseeb (https://github.com/mhaseeb123) - Nghia Truong (https://github.com/ttnghia) URL: #17354
1 parent 9c5cd81 commit c7bfa77

File tree

2 files changed

+29
-12
lines changed

2 files changed

+29
-12
lines changed

cpp/src/binaryop/compiled/binary_ops.cuh

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
#include <cudf/column/column_device_view.cuh>
2323
#include <cudf/column/column_view.hpp>
24-
#include <cudf/detail/utilities/integer_utils.hpp>
24+
#include <cudf/detail/utilities/cuda.cuh>
2525
#include <cudf/unary.hpp>
2626

2727
#include <rmm/cuda_stream_view.hpp>
@@ -253,16 +253,11 @@ struct binary_op_double_device_dispatcher {
253253
template <typename Functor>
254254
CUDF_KERNEL void for_each_kernel(cudf::size_type size, Functor f)
255255
{
256-
int tid = threadIdx.x;
257-
int blkid = blockIdx.x;
258-
int blksz = blockDim.x;
259-
int gridsz = gridDim.x;
260-
261-
int start = tid + blkid * blksz;
262-
int step = blksz * gridsz;
256+
auto start = cudf::detail::grid_1d::global_thread_id();
257+
auto const stride = cudf::detail::grid_1d::grid_stride();
263258

264259
#pragma unroll
265-
for (cudf::size_type i = start; i < size; i += step) {
260+
for (auto i = start; i < size; i += stride) {
266261
f(i);
267262
}
268263
}
@@ -282,9 +277,9 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
282277
int min_grid_size;
283278
CUDF_CUDA_TRY(
284279
cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel<decltype(f)>));
285-
// 2 elements per thread.
286-
int const grid_size = util::div_rounding_up_safe(size, 2 * block_size);
287-
for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
280+
auto grid = cudf::detail::grid_1d(size, block_size, 2 /* elements_per_thread */);
281+
for_each_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
282+
size, std::forward<Functor&&>(f));
288283
}
289284

290285
template <class BinaryOperator>

cpp/tests/binaryop/binop-compiled-test.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
#include <cudf_test/testing_main.hpp>
2424
#include <cudf_test/type_lists.hpp>
2525

26+
#include <cudf/aggregation.hpp>
2627
#include <cudf/binaryop.hpp>
2728
#include <cudf/detail/iterator.cuh>
2829
#include <cudf/fixed_point/fixed_point.hpp>
30+
#include <cudf/reduction.hpp>
2931
#include <cudf/types.hpp>
3032

3133
#include <thrust/iterator/counting_iterator.h>
@@ -820,4 +822,24 @@ TEST_F(BinaryOperationCompiledTest_NullOpsString, NullMin_Vector_Vector)
820822
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, result->view());
821823
}
822824

825+
TEST(BinaryOperationCompiledTest, LargeColumnNoOverflow)
826+
{
827+
cudf::size_type num_rows{1'799'989'091};
828+
auto big = cudf::make_column_from_scalar(
829+
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{10, true}, num_rows);
830+
auto small = cudf::make_column_from_scalar(
831+
cudf::numeric_scalar<cudf::id_to_type<cudf::type_id::INT8>>{1, true}, num_rows);
832+
833+
auto mask = cudf::binary_operation(big->view(),
834+
small->view(),
835+
cudf::binary_operator::GREATER,
836+
cudf::data_type{cudf::type_id::BOOL8});
837+
838+
auto agg = cudf::make_sum_aggregation<cudf::reduce_aggregation>();
839+
auto result =
840+
cudf::reduce(mask->view(), *agg, cudf::data_type{cudf::type_to_id<cudf::size_type>()});
841+
auto got = static_cast<cudf::numeric_scalar<cudf::size_type>*>(result.get())->value();
842+
EXPECT_EQ(num_rows, got);
843+
}
844+
823845
CUDF_TEST_PROGRAM_MAIN()

0 commit comments

Comments
 (0)