From e2015641c7c5e6c5dc0d780c6f74f7491f50c79d Mon Sep 17 00:00:00 2001 From: Matt711 Date: Mon, 7 Apr 2025 12:34:08 -0400 Subject: [PATCH 01/17] Add a public API for converting a table_view to device array --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/reshape.hpp | 25 +++++- cpp/src/reshape/table_to_array.cu | 110 +++++++++++++++++++++++++++ cpp/tests/CMakeLists.txt | 2 +- cpp/tests/reshape/table_to_array.cpp | 84 ++++++++++++++++++++ 5 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 cpp/src/reshape/table_to_array.cu create mode 100644 cpp/tests/reshape/table_to_array.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ca645df71cc..95b015a1f90 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -644,6 +644,7 @@ add_library( src/reshape/byte_cast.cu src/reshape/interleave_columns.cu src/reshape/tile.cu + src/reshape/table_to_array.cu src/rolling/detail/optimized_unbounded_window.cpp src/rolling/detail/rolling_collect_list.cu src/rolling/detail/rolling_fixed_window.cu diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index e437e7abfca..4c5649f7bda 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,6 +107,29 @@ std::unique_ptr byte_cast( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Copies a table into a contiguous column-major device array. + * + * This function converts a table_view with columns of the same type + * into a 2D device array in column-major order. The output buffer must be + * preallocated and large enough to hold `num_rows * num_columns` values of `output_dtype`. + * + * @throws cudf::logic_error if column types do not match `output_dtype` + * @throws cudf::logic_error if `output_dtype` is not fixed-width or is a fixed-point type + * + * @param input A table with fixed-width, non-nullable columns of the same type + * @param output Pointer to device memory large enough to hold `num_rows * num_columns` values + * @param output_dtype The logical data type of the output array + * @param stream CUDA stream used for memory operations + * @param mr Memory resource used for device allocations (currently unused) + */ +void table_to_device_array( + table_view const& input, + void* output, + cudf::data_type output_dtype, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu new file mode 100644 index 00000000000..fdf3c00f4ac --- /dev/null +++ b/cpp/src/reshape/table_to_array.cu @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace cudf { +namespace { + +template +void _table_to_device_array(cudf::table_view const& input, + void* output, + rmm::cuda_stream_view stream) +{ + auto const num_columns = input.num_columns(); + auto const num_rows = input.num_rows(); + auto const item_size = sizeof(T); + + std::vector dsts(num_columns); + std::vector srcs(num_columns); + std::vector sizes(num_columns, item_size * num_rows); + + auto* base_ptr = static_cast(output); + + for (int i = 0; i < num_columns; ++i) { + auto const& col = input.column(i); + CUDF_EXPECTS(col.type().id() == cudf::type_to_id(), "Mismatched column type"); + + auto* src_ptr = static_cast(col.data()); + auto* dst_ptr = base_ptr + i * item_size * num_rows; + + srcs[i] = src_ptr; + dsts[i] = dst_ptr; + } + + cudaMemcpyAttributes attr{}; + attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; + std::vector attrs{attr}; + std::vector attr_idxs{0}; + + size_t fail_index = SIZE_MAX; + cudaError_t err = cudaMemcpyBatchAsync(dsts.data(), + const_cast(srcs.data()), + sizes.data(), + num_columns, + attrs.data(), + attr_idxs.data(), + attrs.size(), + &fail_index, + stream.value()); + + CUDF_CUDA_TRY(err); +} + +struct TableToArrayDispatcher { + table_view const& input; + void* output; + rmm::cuda_stream_view stream; + + template () && !is_fixed_point())> + void operator()() const + { + _table_to_device_array(input, output, stream); + } + + template () || is_fixed_point())> + void operator()() const + { + CUDF_FAIL("Unsupported dtype"); + } +}; + +} // namespace + +void table_to_device_array(table_view const& input, + void* output, + data_type output_dtype, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref) +{ + CUDF_FUNC_RANGE(); + cudf::type_dispatcher(output_dtype, TableToArrayDispatcher{input, output, stream}); +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cea62e5360e..ea3a7f08a8b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -520,7 +520,7 @@ ConfigureTest( # * reshape test ---------------------------------------------------------------------------------- ConfigureTest( RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp - reshape/tile_tests.cpp + reshape/tile_tests.cpp reshape/table_to_array.cpp ) # ################################################################################################## diff --git a/cpp/tests/reshape/table_to_array.cpp b/cpp/tests/reshape/table_to_array.cpp new file mode 100644 index 00000000000..c559a91d8ec --- /dev/null +++ b/cpp/tests/reshape/table_to_array.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct TableToDeviceArrayTest : public cudf::test::BaseFixture {}; + +TEST(TableToDeviceArrayTest, Int32Columns) +{ + auto stream = cudf::get_default_stream(); + + std::vector col0{1, 2, 3}; + std::vector col1{4, 5, 6}; + std::vector col2{7, 8, 9}; + std::vector col3{10, 11, 12}; + + std::vector> columns; + columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); + columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); + columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); + columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); + + cudaMemcpy(columns[0]->mutable_view().data(), + col0.data(), + 3 * sizeof(int32_t), + cudaMemcpyHostToDevice); + cudaMemcpy(columns[1]->mutable_view().data(), + col1.data(), + 3 * sizeof(int32_t), + cudaMemcpyHostToDevice); + cudaMemcpy(columns[2]->mutable_view().data(), + col2.data(), + 3 * sizeof(int32_t), + cudaMemcpyHostToDevice); + cudaMemcpy(columns[3]->mutable_view().data(), + col3.data(), + 3 * sizeof(int32_t), + cudaMemcpyHostToDevice); + + cudf::table_view input_table( + {columns[0]->view(), columns[1]->view(), columns[2]->view(), columns[3]->view()}); + + size_t num_elements = 3 * 4; + rmm::device_buffer output(num_elements * sizeof(int32_t), stream); + + cudf::table_to_device_array(input_table, + output.data(), + cudf::data_type{cudf::type_id::INT32}, + stream, + rmm::mr::get_current_device_resource()); + + std::vector host_result(num_elements); + cudaMemcpy( + host_result.data(), output.data(), num_elements * sizeof(int32_t), cudaMemcpyDeviceToHost); + + std::vector expected{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + + EXPECT_EQ(host_result, expected); +} From 6e9289e38e5f2c1a7d8327a3bf4a529e87342958 Mon Sep 17 00:00:00 2001 From: Matt711 Date: Mon, 7 Apr 2025 17:05:36 -0400 Subject: [PATCH 02/17] support decimals and add more tests --- cpp/src/reshape/table_to_array.cu | 37 ++++--- cpp/tests/reshape/table_to_array.cpp | 152 +++++++++++++++++++-------- 2 files changed, 128 insertions(+), 61 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index fdf3c00f4ac..973292232a0 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -49,7 +49,7 @@ void _table_to_device_array(cudf::table_view const& input, for (int i = 0; i < num_columns; ++i) { auto const& col = input.column(i); - CUDF_EXPECTS(col.type().id() == cudf::type_to_id(), "Mismatched column type"); + CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); auto* src_ptr = static_cast(col.data()); auto* dst_ptr = base_ptr + i * item_size * num_rows; @@ -62,19 +62,17 @@ void _table_to_device_array(cudf::table_view const& input, attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; std::vector attrs{attr}; std::vector attr_idxs{0}; - - size_t fail_index = SIZE_MAX; - cudaError_t err = cudaMemcpyBatchAsync(dsts.data(), - const_cast(srcs.data()), - sizes.data(), - num_columns, - attrs.data(), - attr_idxs.data(), - attrs.size(), - &fail_index, - stream.value()); - - CUDF_CUDA_TRY(err); + size_t fail_idx = SIZE_MAX; + + CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), + const_cast(srcs.data()), + sizes.data(), + num_columns, + attrs.data(), + attr_idxs.data(), + attrs.size(), + &fail_idx, + stream.value())); } struct TableToArrayDispatcher { @@ -82,13 +80,18 @@ struct TableToArrayDispatcher { void* output; rmm::cuda_stream_view stream; - template () && !is_fixed_point())> + template () || is_fixed_point())> void operator()() const { - _table_to_device_array(input, output, stream); + if constexpr (is_fixed_point()) { + using StorageType = cudf::device_storage_type_t; + _table_to_device_array(input, output, stream); + } else { + _table_to_device_array(input, output, stream); + } } - template () || is_fixed_point())> + template () && !is_fixed_point())> void operator()() const { CUDF_FAIL("Unsupported dtype"); diff --git a/cpp/tests/reshape/table_to_array.cpp b/cpp/tests/reshape/table_to_array.cpp index c559a91d8ec..1efcb35d53c 100644 --- a/cpp/tests/reshape/table_to_array.cpp +++ b/cpp/tests/reshape/table_to_array.cpp @@ -15,10 +15,11 @@ */ #include +#include +#include +#include #include -#include -#include #include #include #include @@ -28,57 +29,120 @@ #include #include -struct TableToDeviceArrayTest : public cudf::test::BaseFixture {}; +template +struct TableToDeviceArrayTypedTest : public cudf::test::BaseFixture {}; + +using SupportedTypes = cudf::test::Types; -TEST(TableToDeviceArrayTest, Int32Columns) +TYPED_TEST_SUITE(TableToDeviceArrayTypedTest, SupportedTypes); + +TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) { + using T = TypeParam; auto stream = cudf::get_default_stream(); - std::vector col0{1, 2, 3}; - std::vector col1{4, 5, 6}; - std::vector col2{7, 8, 9}; - std::vector col3{10, 11, 12}; - - std::vector> columns; - columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); - columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); - columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); - columns.push_back(cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, 3)); - - cudaMemcpy(columns[0]->mutable_view().data(), - col0.data(), - 3 * sizeof(int32_t), - cudaMemcpyHostToDevice); - cudaMemcpy(columns[1]->mutable_view().data(), - col1.data(), - 3 * sizeof(int32_t), - cudaMemcpyHostToDevice); - cudaMemcpy(columns[2]->mutable_view().data(), - col2.data(), - 3 * sizeof(int32_t), - cudaMemcpyHostToDevice); - cudaMemcpy(columns[3]->mutable_view().data(), - col3.data(), - 3 * sizeof(int32_t), - cudaMemcpyHostToDevice); - - cudf::table_view input_table( - {columns[0]->view(), columns[1]->view(), columns[2]->view(), columns[3]->view()}); + auto const dtype = cudf::data_type{cudf::type_to_id()}; + + auto const col0 = cudf::test::make_type_param_vector({1, 2, 3}); + auto const col1 = cudf::test::make_type_param_vector({4, 5, 6}); + auto const col2 = cudf::test::make_type_param_vector({7, 8, 9}); + auto const col3 = cudf::test::make_type_param_vector({10, 11, 12}); + std::vector> cols; + auto make_col = [&](auto const& data) { + return std::make_unique( + cudf::test::fixed_width_column_wrapper(data.begin(), data.end())); + }; + + cols.push_back(make_col(col0)); + cols.push_back(make_col(col1)); + cols.push_back(make_col(col2)); + cols.push_back(make_col(col3)); + + cudf::table_view input({cols[0]->view(), cols[1]->view(), cols[2]->view(), cols[3]->view()}); size_t num_elements = 3 * 4; - rmm::device_buffer output(num_elements * sizeof(int32_t), stream); + rmm::device_buffer output(num_elements * sizeof(T), stream); + + cudf::table_to_device_array( + input, output.data(), dtype, stream, rmm::mr::get_current_device_resource()); + + std::vector host_result(num_elements); + CUDF_CUDA_TRY(cudaMemcpy( + host_result.data(), output.data(), num_elements * sizeof(T), cudaMemcpyDeviceToHost)); + + auto const expected_data = + cudf::test::make_type_param_vector({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + std::vector expected(expected_data.begin(), expected_data.end()); + + EXPECT_EQ(host_result, expected); +} + +template +struct FixedPointTableToDeviceArrayTest : public cudf::test::BaseFixture {}; - cudf::table_to_device_array(input_table, - output.data(), - cudf::data_type{cudf::type_id::INT32}, - stream, - rmm::mr::get_current_device_resource()); +TYPED_TEST_SUITE(FixedPointTableToDeviceArrayTest, cudf::test::FixedPointTypes); - std::vector host_result(num_elements); - cudaMemcpy( - host_result.data(), output.data(), num_elements * sizeof(int32_t), cudaMemcpyDeviceToHost); +TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) +{ + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto stream = cudf::get_default_stream(); + auto scale = numeric::scale_type{-2}; + auto dtype = cudf::data_type{cudf::type_to_id(), scale}; + + fp_wrapper col0({123, 456, 789}, scale); + fp_wrapper col1({321, 654, 987}, scale); - std::vector expected{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + cudf::table_view input({col0, col1}); + rmm::device_buffer output(2 * 3 * sizeof(RepType), stream); + cudf::table_to_device_array( + input, output.data(), dtype, stream, rmm::mr::get_current_device_resource()); + + std::vector host_result(6); + CUDF_CUDA_TRY(cudaMemcpy(host_result.data(), + output.data(), + host_result.size() * sizeof(RepType), + cudaMemcpyDeviceToHost)); + + std::vector expected{123, 456, 789, 321, 654, 987}; EXPECT_EQ(host_result, expected); } + +struct TableToDeviceArrayTest : public cudf::test::BaseFixture {}; + +TEST(TableToDeviceArrayTest, UnsupportedStringType) +{ + auto stream = cudf::get_default_stream(); + auto col = cudf::test::strings_column_wrapper({"a", "b", "c"}); + cudf::table_view input_table({col}); + rmm::device_buffer output(3 * sizeof(int32_t), stream); + + EXPECT_THROW(cudf::table_to_device_array(input_table, + output.data(), + cudf::data_type{cudf::type_id::STRING}, + stream, + rmm::mr::get_current_device_resource()), + cudf::logic_error); +} From 7eb36906cf99716f03feec349722e35088d5a82f Mon Sep 17 00:00:00 2001 From: Matt711 Date: Tue, 8 Apr 2025 09:53:02 -0400 Subject: [PATCH 03/17] fallback if cuda version < 12.8 --- cpp/src/reshape/table_to_array.cu | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 973292232a0..0b83cfe435b 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -32,6 +32,49 @@ namespace cudf { namespace { +// template +// void _table_to_device_array(cudf::table_view const& input, +// void* output, +// rmm::cuda_stream_view stream) +// { +// auto const num_columns = input.num_columns(); +// auto const num_rows = input.num_rows(); +// auto const item_size = sizeof(T); + +// std::vector dsts(num_columns); +// std::vector srcs(num_columns); +// std::vector sizes(num_columns, item_size * num_rows); + +// auto* base_ptr = static_cast(output); + +// for (int i = 0; i < num_columns; ++i) { +// auto const& col = input.column(i); +// CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); + +// auto* src_ptr = static_cast(col.data()); +// auto* dst_ptr = base_ptr + i * item_size * num_rows; + +// srcs[i] = src_ptr; +// dsts[i] = dst_ptr; +// } + +// cudaMemcpyAttributes attr{}; +// attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; +// std::vector attrs{attr}; +// std::vector attr_idxs{0}; +// size_t fail_idx = SIZE_MAX; + +// CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), +// const_cast(srcs.data()), +// sizes.data(), +// num_columns, +// attrs.data(), +// attr_idxs.data(), +// attrs.size(), +// &fail_idx, +// stream.value())); +// } + template void _table_to_device_array(cudf::table_view const& input, void* output, @@ -58,6 +101,7 @@ void _table_to_device_array(cudf::table_view const& input, dsts[i] = dst_ptr; } +#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 cudaMemcpyAttributes attr{}; attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; std::vector attrs{attr}; @@ -73,8 +117,14 @@ void _table_to_device_array(cudf::table_view const& input, attrs.size(), &fail_idx, stream.value())); +#else + for (int i = 0; i < num_columns; ++i) { + CUDF_CUDA_TRY(cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); + } +#endif } + struct TableToArrayDispatcher { table_view const& input; void* output; From 5a9d19550453754cbd86cf49e27685ca1d7ecbc3 Mon Sep 17 00:00:00 2001 From: Matt711 Date: Tue, 8 Apr 2025 09:54:59 -0400 Subject: [PATCH 04/17] clean up --- cpp/src/reshape/table_to_array.cu | 47 ++----------------------------- 1 file changed, 2 insertions(+), 45 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 0b83cfe435b..9031dbaa19e 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -32,49 +32,6 @@ namespace cudf { namespace { -// template -// void _table_to_device_array(cudf::table_view const& input, -// void* output, -// rmm::cuda_stream_view stream) -// { -// auto const num_columns = input.num_columns(); -// auto const num_rows = input.num_rows(); -// auto const item_size = sizeof(T); - -// std::vector dsts(num_columns); -// std::vector srcs(num_columns); -// std::vector sizes(num_columns, item_size * num_rows); - -// auto* base_ptr = static_cast(output); - -// for (int i = 0; i < num_columns; ++i) { -// auto const& col = input.column(i); -// CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); - -// auto* src_ptr = static_cast(col.data()); -// auto* dst_ptr = base_ptr + i * item_size * num_rows; - -// srcs[i] = src_ptr; -// dsts[i] = dst_ptr; -// } - -// cudaMemcpyAttributes attr{}; -// attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; -// std::vector attrs{attr}; -// std::vector attr_idxs{0}; -// size_t fail_idx = SIZE_MAX; - -// CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), -// const_cast(srcs.data()), -// sizes.data(), -// num_columns, -// attrs.data(), -// attr_idxs.data(), -// attrs.size(), -// &fail_idx, -// stream.value())); -// } - template void _table_to_device_array(cudf::table_view const& input, void* output, @@ -119,12 +76,12 @@ void _table_to_device_array(cudf::table_view const& input, stream.value())); #else for (int i = 0; i < num_columns; ++i) { - CUDF_CUDA_TRY(cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); + CUDF_CUDA_TRY( + cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); } #endif } - struct TableToArrayDispatcher { table_view const& input; void* output; From e5e65cc592fd367e2a6e613e2a7b5760c799e417 Mon Sep 17 00:00:00 2001 From: Matt711 Date: Tue, 8 Apr 2025 14:49:49 -0400 Subject: [PATCH 05/17] address reviews --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/reshape/table_to_array.cpp | 58 ++++++++++++ cpp/include/cudf/detail/reshape.hpp | 10 +- cpp/include/cudf/reshape.hpp | 17 ++-- cpp/src/reshape/table_to_array.cu | 110 +++++++++++++--------- cpp/tests/reshape/table_to_array.cpp | 95 ++++++++++--------- 6 files changed, 196 insertions(+), 96 deletions(-) create mode 100644 cpp/benchmarks/reshape/table_to_array.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 68c3ad706c4..cead6642472 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -430,7 +430,7 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp) # ################################################################################################## # * reshape benchmark # --------------------------------------------------------------------------------- -ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp) +ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp reshape/table_to_array.cpp) # ################################################################################################## # * rolling benchmark diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp new file mode 100644 index 00000000000..a1d0909ef9c --- /dev/null +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +static void bench_table_to_array(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_cols = static_cast(state.get_int64("columns")); + + if (static_cast(num_rows) * num_cols >= + static_cast(std::numeric_limits::max())) { + state.skip("Input size exceeds cudf::size_type limit"); + } + + data_profile profile = + data_profile_builder().distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000); + profile.set_null_probability(0.0); + std::vector types(num_cols, cudf::type_id::INT32); + auto input_table = create_random_table(types, row_count{num_rows}, profile); + + auto input_view = input_table->view(); + auto stream = cudf::get_default_stream(); + auto dtype = cudf::data_type{cudf::type_id::INT32}; + + rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_global_memory_reads(num_rows * num_cols); // all bytes are read + state.add_global_memory_writes(num_rows * num_cols); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::table_to_array(input_view, output.data(), dtype, stream); + }); +} + +NVBENCH_BENCH(bench_table_to_array) + .set_name("table_to_array") + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_int64_axis("columns", {2, 10, 100}); diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index aeeed282d8b..46fabcf2350 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,5 +41,13 @@ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view, rmm::device_async_resource_ref mr); +/** + * @copydoc cudf::table_to_array + */ +void table_to_array(table_view const& input, + void* output, + data_type output_dtype, + rmm::cuda_stream_view stream); + } // namespace detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 4c5649f7bda..745c651e3e1 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -115,20 +115,17 @@ std::unique_ptr byte_cast( * preallocated and large enough to hold `num_rows * num_columns` values of `output_dtype`. * * @throws cudf::logic_error if column types do not match `output_dtype` - * @throws cudf::logic_error if `output_dtype` is not fixed-width or is a fixed-point type + * @throws cudf::logic_error if `output_dtype` is not a fixed-width type * * @param input A table with fixed-width, non-nullable columns of the same type - * @param output Pointer to device memory large enough to hold `num_rows * num_columns` values - * @param output_dtype The logical data type of the output array + * @param output Pointer to device memory sized to hold `num_rows * num_columns` values + * @param output_dtype The data type of the output array * @param stream CUDA stream used for memory operations - * @param mr Memory resource used for device allocations (currently unused) */ -void table_to_device_array( - table_view const& input, - void* output, - cudf::data_type output_dtype, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +void table_to_array(table_view const& input, + void* output, + cudf::data_type output_dtype, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 9031dbaa19e..b7a82fb8d80 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -24,62 +25,75 @@ #include #include +#include #include #include +#include #include #include namespace cudf { +namespace detail { namespace { template -void _table_to_device_array(cudf::table_view const& input, - void* output, - rmm::cuda_stream_view stream) +void _table_to_array(table_view const& input, void* output, rmm::cuda_stream_view stream) { auto const num_columns = input.num_columns(); auto const num_rows = input.num_rows(); auto const item_size = sizeof(T); + auto* base_ptr = static_cast(output); - std::vector dsts(num_columns); - std::vector srcs(num_columns); - std::vector sizes(num_columns, item_size * num_rows); + CUDF_EXPECTS(num_columns > 0, "Must have at least one column."); + CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null."); - auto* base_ptr = static_cast(output); + rmm::device_uvector d_srcs(num_columns, stream); + rmm::device_uvector d_dsts(num_columns, stream); + + std::vector h_srcs(num_columns); + std::vector h_dsts(num_columns); for (int i = 0; i < num_columns; ++i) { auto const& col = input.column(i); CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); + CUDF_EXPECTS(col.null_count() == 0, "All columns must be non-nullable or contain no nulls"); - auto* src_ptr = static_cast(col.data()); - auto* dst_ptr = base_ptr + i * item_size * num_rows; - - srcs[i] = src_ptr; - dsts[i] = dst_ptr; + h_srcs[i] = static_cast(col.data()); + h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } -#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 - cudaMemcpyAttributes attr{}; - attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream; - std::vector attrs{attr}; - std::vector attr_idxs{0}; - size_t fail_idx = SIZE_MAX; - - CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), - const_cast(srcs.data()), - sizes.data(), - num_columns, - attrs.data(), - attr_idxs.data(), - attrs.size(), - &fail_idx, - stream.value())); -#else - for (int i = 0; i < num_columns; ++i) { - CUDF_CUDA_TRY( - cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); - } -#endif + CUDF_CUDA_TRY(cudaMemcpyAsync(d_srcs.data(), + h_srcs.data(), + sizeof(void*) * num_columns, + cudaMemcpyHostToDevice, + stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_dsts.data(), + h_dsts.data(), + sizeof(void*) * num_columns, + cudaMemcpyHostToDevice, + stream.value())); + + thrust::constant_iterator sizes(static_cast(item_size * num_rows)); + + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceMemcpy::Batched(d_temp_storage, + temp_storage_bytes, + d_srcs.begin(), + d_dsts.begin(), + sizes, + num_columns, + stream.value()); + + rmm::device_buffer temp_storage(temp_storage_bytes, stream); + + cub::DeviceMemcpy::Batched(temp_storage.data(), + temp_storage_bytes, + d_srcs.begin(), + d_dsts.begin(), + sizes, + num_columns, + stream.value()); } struct TableToArrayDispatcher { @@ -92,9 +106,9 @@ struct TableToArrayDispatcher { { if constexpr (is_fixed_point()) { using StorageType = cudf::device_storage_type_t; - _table_to_device_array(input, output, stream); + _table_to_array(input, output, stream); } else { - _table_to_device_array(input, output, stream); + _table_to_array(input, output, stream); } } @@ -107,14 +121,26 @@ struct TableToArrayDispatcher { } // namespace -void table_to_device_array(table_view const& input, - void* output, - data_type output_dtype, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref) +void table_to_array(table_view const& input, + void* output, + data_type output_dtype, + rmm::cuda_stream_view stream) { - CUDF_FUNC_RANGE(); + CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null."); + CUDF_EXPECTS(input.num_columns() > 0, "Input must have at least one column."); + cudf::type_dispatcher(output_dtype, TableToArrayDispatcher{input, output, stream}); } +} // namespace detail + +void table_to_array(table_view const& input, + void* output, + data_type output_dtype, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + cudf::detail::table_to_array(input, output, output_dtype, stream); +} + } // namespace cudf diff --git a/cpp/tests/reshape/table_to_array.cpp b/cpp/tests/reshape/table_to_array.cpp index 1efcb35d53c..bc3fd95eec0 100644 --- a/cpp/tests/reshape/table_to_array.cpp +++ b/cpp/tests/reshape/table_to_array.cpp @@ -59,40 +59,42 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) { using T = TypeParam; auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); auto const dtype = cudf::data_type{cudf::type_to_id()}; - auto const col0 = cudf::test::make_type_param_vector({1, 2, 3}); - auto const col1 = cudf::test::make_type_param_vector({4, 5, 6}); - auto const col2 = cudf::test::make_type_param_vector({7, 8, 9}); - auto const col3 = cudf::test::make_type_param_vector({10, 11, 12}); + int nrows = 3; + int ncols = 4; std::vector> cols; - auto make_col = [&](auto const& data) { - return std::make_unique( - cudf::test::fixed_width_column_wrapper(data.begin(), data.end())); - }; - - cols.push_back(make_col(col0)); - cols.push_back(make_col(col1)); - cols.push_back(make_col(col2)); - cols.push_back(make_col(col3)); - - cudf::table_view input({cols[0]->view(), cols[1]->view(), cols[2]->view(), cols[3]->view()}); - size_t num_elements = 3 * 4; - rmm::device_buffer output(num_elements * sizeof(T), stream); - - cudf::table_to_device_array( - input, output.data(), dtype, stream, rmm::mr::get_current_device_resource()); - - std::vector host_result(num_elements); - CUDF_CUDA_TRY(cudaMemcpy( - host_result.data(), output.data(), num_elements * sizeof(T), cudaMemcpyDeviceToHost)); - - auto const expected_data = - cudf::test::make_type_param_vector({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - std::vector expected(expected_data.begin(), expected_data.end()); - + std::vector expected; + + for (int col = 0; col < ncols; ++col) { + std::vector data(nrows); + for (int row = 0; row < nrows; ++row) { + auto val = col * nrows + row + 1; + if constexpr (cudf::is_chrono()) { + data[row] = T(typename T::duration{val}); + } else { + data[row] = static_cast(val); + } + expected.push_back(data[row]); + } + cols.push_back(std::make_unique( + cudf::test::fixed_width_column_wrapper(data.begin(), data.end()))); + } + + std::vector views; + for (auto const& col : cols) { + views.push_back(col->view()); + } + cudf::table_view input{views}; + + auto output = cudf::detail::make_zeroed_device_uvector_sync(nrows * ncols, stream, *mr); + + cudf::table_to_array(input, output.data(), dtype, stream); + + auto host_result = cudf::detail::make_std_vector_sync(output, stream); EXPECT_EQ(host_result, expected); } @@ -108,6 +110,7 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) using fp_wrapper = cudf::test::fixed_point_column_wrapper; auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); auto scale = numeric::scale_type{-2}; auto dtype = cudf::data_type{cudf::type_to_id(), scale}; @@ -115,16 +118,13 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) fp_wrapper col1({321, 654, 987}, scale); cudf::table_view input({col0, col1}); - rmm::device_buffer output(2 * 3 * sizeof(RepType), stream); + size_t num_elements = input.num_rows() * input.num_columns(); - cudf::table_to_device_array( - input, output.data(), dtype, stream, rmm::mr::get_current_device_resource()); + auto output = cudf::detail::make_zeroed_device_uvector_sync(num_elements, stream, *mr); - std::vector host_result(6); - CUDF_CUDA_TRY(cudaMemcpy(host_result.data(), - output.data(), - host_result.size() * sizeof(RepType), - cudaMemcpyDeviceToHost)); + cudf::table_to_array(input, output.data(), dtype, stream); + + auto host_result = cudf::detail::make_std_vector_sync(output, stream); std::vector expected{123, 456, 789, 321, 654, 987}; EXPECT_EQ(host_result, expected); @@ -139,10 +139,21 @@ TEST(TableToDeviceArrayTest, UnsupportedStringType) cudf::table_view input_table({col}); rmm::device_buffer output(3 * sizeof(int32_t), stream); - EXPECT_THROW(cudf::table_to_device_array(input_table, - output.data(), - cudf::data_type{cudf::type_id::STRING}, - stream, - rmm::mr::get_current_device_resource()), + EXPECT_THROW(cudf::table_to_array( + input_table, output.data(), cudf::data_type{cudf::type_id::STRING}, stream), cudf::logic_error); } + +TEST(TableToDeviceArrayTest, FailsWithNullValues) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({1, 2, 3}, {true, false, true}); + + cudf::table_view input_table({col}); + rmm::device_buffer output(3 * sizeof(int32_t), stream); + + EXPECT_THROW( + cudf::table_to_array(input_table, output.data(), cudf::data_type{cudf::type_id::INT32}, stream), + cudf::logic_error); +} From 0343bb3197e8579aa610898f49cda82423c863d6 Mon Sep 17 00:00:00 2001 From: Matt711 Date: Tue, 8 Apr 2025 15:46:18 -0400 Subject: [PATCH 06/17] address review --- cpp/benchmarks/reshape/table_to_array.cpp | 6 +-- cpp/src/reshape/table_to_array.cu | 47 ++++++++--------------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp index a1d0909ef9c..e07b1be169d 100644 --- a/cpp/benchmarks/reshape/table_to_array.cpp +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -31,9 +31,9 @@ static void bench_table_to_array(nvbench::state& state) state.skip("Input size exceeds cudf::size_type limit"); } - data_profile profile = - data_profile_builder().distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000); - profile.set_null_probability(0.0); + data_profile profile = data_profile_builder() + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000) + .no_validity(); std::vector types(num_cols, cudf::type_id::INT32); auto input_table = create_random_table(types, row_count{num_rows}, profile); diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index b7a82fb8d80..96656d9608a 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -16,7 +16,9 @@ #include #include +#include #include +#include #include #include #include @@ -37,20 +39,14 @@ namespace detail { namespace { template -void _table_to_array(table_view const& input, void* output, rmm::cuda_stream_view stream) +void table_to_array_iml(table_view const& input, void* output, rmm::cuda_stream_view stream) { auto const num_columns = input.num_columns(); auto const num_rows = input.num_rows(); auto const item_size = sizeof(T); auto* base_ptr = static_cast(output); - CUDF_EXPECTS(num_columns > 0, "Must have at least one column."); - CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null."); - - rmm::device_uvector d_srcs(num_columns, stream); - rmm::device_uvector d_dsts(num_columns, stream); - - std::vector h_srcs(num_columns); + std::vector h_srcs(num_columns); std::vector h_dsts(num_columns); for (int i = 0; i < num_columns; ++i) { @@ -58,20 +54,14 @@ void _table_to_array(table_view const& input, void* output, rmm::cuda_stream_vie CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); CUDF_EXPECTS(col.null_count() == 0, "All columns must be non-nullable or contain no nulls"); - h_srcs[i] = static_cast(col.data()); + h_srcs[i] = const_cast(static_cast(col.data())); h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } - CUDF_CUDA_TRY(cudaMemcpyAsync(d_srcs.data(), - h_srcs.data(), - sizeof(void*) * num_columns, - cudaMemcpyHostToDevice, - stream.value())); - CUDF_CUDA_TRY(cudaMemcpyAsync(d_dsts.data(), - h_dsts.data(), - sizeof(void*) * num_columns, - cudaMemcpyHostToDevice, - stream.value())); + auto const mr = cudf::get_current_device_resource_ref(); + + auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); + auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); thrust::constant_iterator sizes(static_cast(item_size * num_rows)); @@ -101,18 +91,13 @@ struct TableToArrayDispatcher { void* output; rmm::cuda_stream_view stream; - template () || is_fixed_point())> + template ())> void operator()() const { - if constexpr (is_fixed_point()) { - using StorageType = cudf::device_storage_type_t; - _table_to_array(input, output, stream); - } else { - _table_to_array(input, output, stream); - } + table_to_array_iml(input, output, stream); } - template () && !is_fixed_point())> + template ())> void operator()() const { CUDF_FAIL("Unsupported dtype"); @@ -126,10 +111,12 @@ void table_to_array(table_view const& input, data_type output_dtype, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null."); - CUDF_EXPECTS(input.num_columns() > 0, "Input must have at least one column."); + CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null.", std::invalid_argument); + CUDF_EXPECTS( + input.num_columns() > 0, "Input must have at least one column.", std::invalid_argument); - cudf::type_dispatcher(output_dtype, TableToArrayDispatcher{input, output, stream}); + cudf::type_dispatcher(output_dtype, + TableToArrayDispatcher{input, output, stream}); } } // namespace detail From 7530ecf7195106d8b7a200f1ca1148190edcefea Mon Sep 17 00:00:00 2001 From: Matt711 Date: Tue, 8 Apr 2025 15:52:09 -0400 Subject: [PATCH 07/17] use snake case --- cpp/src/reshape/table_to_array.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 96656d9608a..b90310ef8aa 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -86,7 +86,7 @@ void table_to_array_iml(table_view const& input, void* output, rmm::cuda_stream_ stream.value()); } -struct TableToArrayDispatcher { +struct table_to_array_dispatcher { table_view const& input; void* output; rmm::cuda_stream_view stream; @@ -115,8 +115,8 @@ void table_to_array(table_view const& input, CUDF_EXPECTS( input.num_columns() > 0, "Input must have at least one column.", std::invalid_argument); - cudf::type_dispatcher(output_dtype, - TableToArrayDispatcher{input, output, stream}); + cudf::type_dispatcher( + output_dtype, table_to_array_dispatcher{input, output, stream}); } } // namespace detail From f0276166bae5b30893bba9a9948c7b609b91dc62 Mon Sep 17 00:00:00 2001 From: Matt711 Date: Thu, 10 Apr 2025 10:12:32 -0400 Subject: [PATCH 08/17] address reviews --- cpp/CMakeLists.txt | 2 +- cpp/include/cudf/reshape.hpp | 2 +- cpp/src/reshape/table_to_array.cu | 76 +++++++++++++++++-- cpp/tests/CMakeLists.txt | 2 +- ..._to_array.cpp => table_to_array_tests.cpp} | 18 ++--- 5 files changed, 81 insertions(+), 19 deletions(-) rename cpp/tests/reshape/{table_to_array.cpp => table_to_array_tests.cpp} (90%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6e73b4025a4..e73c6ec21ed 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -643,8 +643,8 @@ add_library( src/replace/replace.cu src/reshape/byte_cast.cu src/reshape/interleave_columns.cu - src/reshape/tile.cu src/reshape/table_to_array.cu + src/reshape/tile.cu src/rolling/detail/optimized_unbounded_window.cpp src/rolling/detail/rolling_collect_list.cu src/rolling/detail/rolling_fixed_window.cu diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 745c651e3e1..582df26c27e 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -110,7 +110,7 @@ std::unique_ptr byte_cast( /** * @brief Copies a table into a contiguous column-major device array. * - * This function converts a table_view with columns of the same type + * This function copies a table_view with columns of the same type * into a 2D device array in column-major order. The output buffer must be * preallocated and large enough to hold `num_rows * num_columns` values of `output_dtype`. * diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index b90310ef8aa..6117e6fe489 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -38,8 +39,65 @@ namespace cudf { namespace detail { namespace { +// template +// void table_to_array_impl(cudf::table_view const& input, +// void* output, +// rmm::cuda_stream_view stream) +// { +// auto const num_columns = input.num_columns(); +// auto const num_rows = input.num_rows(); +// auto const item_size = sizeof(T); + +// std::vector dsts(num_columns); +// std::vector srcs(num_columns); +// std::vector sizes(num_columns, item_size * num_rows); + +// auto* base_ptr = static_cast(output); + +// CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), +// "All columns must have the same data type", +// cudf::data_type_error); +// CUDF_EXPECTS( !cudf::has_nulls(input), +// "All columns must contain no nulls", +// std::invalid_argument); + +// std::transform(input.begin(), input.end(), srcs.begin(), +// [](auto const& col) { +// return const_cast(static_cast(col.template data())); +// }); +// for (int i = 0; i < num_columns; ++i) { +// dsts[i] = static_cast(base_ptr + i * item_size * num_rows); +// } + +// #if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 +// // std::vector attrs(1); +// // attrs[0].srcAccessOrder = cudaMemcpySrcAccessOrderStream; +// // std::vector attr_idxs(num_columns, 0); +// // size_t fail_idx = SIZE_MAX; + +// // CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), +// // const_cast(srcs.data()), +// // sizes.data(), +// // num_columns, +// // attrs.data(), +// // attr_idxs.data(), +// // attrs.size(), +// // &fail_idx, +// // stream.value())); +// for (int i = 0; i < num_columns; ++i) { +// CUDF_CUDA_TRY( +// cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); +// } +// #else +// for (int i = 0; i < num_columns; ++i) { +// CUDF_CUDA_TRY( +// cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); +// } +// #endif +// } + template -void table_to_array_iml(table_view const& input, void* output, rmm::cuda_stream_view stream) +void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream_view stream) { auto const num_columns = input.num_columns(); auto const num_rows = input.num_rows(); @@ -49,12 +107,16 @@ void table_to_array_iml(table_view const& input, void* output, rmm::cuda_stream_ std::vector h_srcs(num_columns); std::vector h_dsts(num_columns); + CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), + "All columns must have the same data type", + cudf::data_type_error); + CUDF_EXPECTS( !cudf::has_nulls(input), + "All columns must contain no nulls", + std::invalid_argument); + std::transform( input.begin(), input.end(), h_srcs.begin(), [] (auto& col) { + return const_cast(static_cast(col.template data())); + }); for (int i = 0; i < num_columns; ++i) { - auto const& col = input.column(i); - CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype"); - CUDF_EXPECTS(col.null_count() == 0, "All columns must be non-nullable or contain no nulls"); - - h_srcs[i] = const_cast(static_cast(col.data())); h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } @@ -94,7 +156,7 @@ struct table_to_array_dispatcher { template ())> void operator()() const { - table_to_array_iml(input, output, stream); + table_to_array_impl(input, output, stream); } template ())> diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ea3a7f08a8b..ee030bbb93e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -520,7 +520,7 @@ ConfigureTest( # * reshape test ---------------------------------------------------------------------------------- ConfigureTest( RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp - reshape/tile_tests.cpp reshape/table_to_array.cpp + reshape/tile_tests.cpp reshape/table_to_array_tests.cpp ) # ################################################################################################## diff --git a/cpp/tests/reshape/table_to_array.cpp b/cpp/tests/reshape/table_to_array_tests.cpp similarity index 90% rename from cpp/tests/reshape/table_to_array.cpp rename to cpp/tests/reshape/table_to_array_tests.cpp index bc3fd95eec0..ce1c933d82a 100644 --- a/cpp/tests/reshape/table_to_array.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -84,17 +84,16 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) cudf::test::fixed_width_column_wrapper(data.begin(), data.end()))); } - std::vector views; - for (auto const& col : cols) { - views.push_back(col->view()); - } + std::vector views(cols.size()); + std::transform(cols.begin(), cols.end(), views.begin(), + [](auto const& col) { return col->view(); }); cudf::table_view input{views}; - auto output = cudf::detail::make_zeroed_device_uvector_sync(nrows * ncols, stream, *mr); + auto output = cudf::detail::make_zeroed_device_uvector(nrows * ncols, stream, *mr); cudf::table_to_array(input, output.data(), dtype, stream); - auto host_result = cudf::detail::make_std_vector_sync(output, stream); + auto host_result = cudf::detail::make_std_vector(output, stream); EXPECT_EQ(host_result, expected); } @@ -120,11 +119,12 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) cudf::table_view input({col0, col1}); size_t num_elements = input.num_rows() * input.num_columns(); - auto output = cudf::detail::make_zeroed_device_uvector_sync(num_elements, stream, *mr); + auto output = cudf::detail::make_zeroed_device_uvector(num_elements, stream, *mr); + cudf::table_to_array(input, output.data(), dtype, stream); - auto host_result = cudf::detail::make_std_vector_sync(output, stream); + auto host_result = cudf::detail::make_std_vector(output, stream); std::vector expected{123, 456, 789, 321, 654, 987}; EXPECT_EQ(host_result, expected); @@ -155,5 +155,5 @@ TEST(TableToDeviceArrayTest, FailsWithNullValues) EXPECT_THROW( cudf::table_to_array(input_table, output.data(), cudf::data_type{cudf::type_id::INT32}, stream), - cudf::logic_error); + std::invalid_argument); } From b3251fe90ade16a918dfa47a11f7472b851fa50a Mon Sep 17 00:00:00 2001 From: Matt711 Date: Thu, 10 Apr 2025 10:31:36 -0400 Subject: [PATCH 09/17] clean up --- cpp/src/reshape/table_to_array.cu | 12 +++++------- cpp/tests/reshape/table_to_array_tests.cpp | 5 ++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 6117e6fe489..7458faef04a 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -22,6 +21,7 @@ #include #include #include +#include #include #include @@ -108,12 +108,10 @@ void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream std::vector h_dsts(num_columns); CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), - "All columns must have the same data type", - cudf::data_type_error); - CUDF_EXPECTS( !cudf::has_nulls(input), - "All columns must contain no nulls", - std::invalid_argument); - std::transform( input.begin(), input.end(), h_srcs.begin(), [] (auto& col) { + "All columns must have the same data type", + cudf::data_type_error); + CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument); + std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { return const_cast(static_cast(col.template data())); }); for (int i = 0; i < num_columns; ++i) { diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp index ce1c933d82a..9ad26ec8a20 100644 --- a/cpp/tests/reshape/table_to_array_tests.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -85,8 +85,8 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) } std::vector views(cols.size()); - std::transform(cols.begin(), cols.end(), views.begin(), - [](auto const& col) { return col->view(); }); + std::transform( + cols.begin(), cols.end(), views.begin(), [](auto const& col) { return col->view(); }); cudf::table_view input{views}; auto output = cudf::detail::make_zeroed_device_uvector(nrows * ncols, stream, *mr); @@ -121,7 +121,6 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) auto output = cudf::detail::make_zeroed_device_uvector(num_elements, stream, *mr); - cudf::table_to_array(input, output.data(), dtype, stream); auto host_result = cudf::detail::make_std_vector(output, stream); From 96ef61912079e45c8976f1fe8ee579c61fe88726 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 14 Apr 2025 18:36:42 -0400 Subject: [PATCH 10/17] address review --- cpp/src/reshape/table_to_array.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 7458faef04a..cf921288b8f 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -104,8 +104,8 @@ void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream auto const item_size = sizeof(T); auto* base_ptr = static_cast(output); - std::vector h_srcs(num_columns); - std::vector h_dsts(num_columns); + auto h_srcs = make_host_vector(num_columns, stream); + auto h_dsts = make_host_vector(num_columns, stream); CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), "All columns must have the same data type", From 82c5b22de5540c85770542c49cfebc92f6fc82ad Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 14 Apr 2025 19:37:44 -0400 Subject: [PATCH 11/17] pass a device_span instead of a raw pointer --- cpp/benchmarks/reshape/table_to_array.cpp | 7 +- cpp/include/cudf/reshape.hpp | 21 ++++-- cpp/src/reshape/table_to_array.cu | 83 +++++----------------- cpp/tests/reshape/table_to_array_tests.cpp | 52 ++++++++++++-- 4 files changed, 81 insertions(+), 82 deletions(-) diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp index e07b1be169d..6e678b71f09 100644 --- a/cpp/benchmarks/reshape/table_to_array.cpp +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -18,6 +18,9 @@ #include #include +#include + +#include #include @@ -42,13 +45,15 @@ static void bench_table_to_array(nvbench::state& state) auto dtype = cudf::data_type{cudf::type_id::INT32}; rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream); + auto span = cudf::device_span(reinterpret_cast(output.data()), + output.size()); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); state.add_global_memory_reads(num_rows * num_cols); // all bytes are read state.add_global_memory_writes(num_rows * num_cols); // all bytes are written state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - cudf::table_to_array(input_view, output.data(), dtype, stream); + cudf::table_to_array(input_view, span, dtype, stream); }); } diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 582df26c27e..794a6df757c 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -21,6 +21,9 @@ #include #include #include +#include + +#include #include @@ -110,20 +113,24 @@ std::unique_ptr byte_cast( /** * @brief Copies a table into a contiguous column-major device array. * - * This function copies a table_view with columns of the same type - * into a 2D device array in column-major order. The output buffer must be - * preallocated and large enough to hold `num_rows * num_columns` values of `output_dtype`. + * This function copies a `table_view` with columns of the same fixed-width type + * into a 2D device array stored in column-major order. + * + * The output buffer must be preallocated and passed as a `device_span` using + * a `device_span`. It must be large enough to hold + * `num_rows * num_columns * size_of(output_dtype)` bytes. * - * @throws cudf::logic_error if column types do not match `output_dtype` + * @throws cudf::logic_error if columns do not all have the same type as `output_dtype` * @throws cudf::logic_error if `output_dtype` is not a fixed-width type + * @throws std::invalid_argument if the output span is too small * * @param input A table with fixed-width, non-nullable columns of the same type - * @param output Pointer to device memory sized to hold `num_rows * num_columns` values - * @param output_dtype The data type of the output array + * @param output A span representing preallocated device memory for the output + * @param output_dtype The data type of the output elements * @param stream CUDA stream used for memory operations */ void table_to_array(table_view const& input, - void* output, + device_span output, cudf::data_type output_dtype, rmm::cuda_stream_view stream = cudf::get_default_stream()); diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index cf921288b8f..41208be8427 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -39,81 +40,31 @@ namespace cudf { namespace detail { namespace { -// template -// void table_to_array_impl(cudf::table_view const& input, -// void* output, -// rmm::cuda_stream_view stream) -// { -// auto const num_columns = input.num_columns(); -// auto const num_rows = input.num_rows(); -// auto const item_size = sizeof(T); - -// std::vector dsts(num_columns); -// std::vector srcs(num_columns); -// std::vector sizes(num_columns, item_size * num_rows); - -// auto* base_ptr = static_cast(output); - -// CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), -// "All columns must have the same data type", -// cudf::data_type_error); -// CUDF_EXPECTS( !cudf::has_nulls(input), -// "All columns must contain no nulls", -// std::invalid_argument); - -// std::transform(input.begin(), input.end(), srcs.begin(), -// [](auto const& col) { -// return const_cast(static_cast(col.template data())); -// }); -// for (int i = 0; i < num_columns; ++i) { -// dsts[i] = static_cast(base_ptr + i * item_size * num_rows); -// } - -// #if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 -// // std::vector attrs(1); -// // attrs[0].srcAccessOrder = cudaMemcpySrcAccessOrderStream; -// // std::vector attr_idxs(num_columns, 0); -// // size_t fail_idx = SIZE_MAX; - -// // CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(), -// // const_cast(srcs.data()), -// // sizes.data(), -// // num_columns, -// // attrs.data(), -// // attr_idxs.data(), -// // attrs.size(), -// // &fail_idx, -// // stream.value())); -// for (int i = 0; i < num_columns; ++i) { -// CUDF_CUDA_TRY( -// cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); -// } -// #else -// for (int i = 0; i < num_columns; ++i) { -// CUDF_CUDA_TRY( -// cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value())); -// } -// #endif -// } - template -void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream_view stream) +void table_to_array_impl(table_view const& input, + device_span output, + rmm::cuda_stream_view stream) { auto const num_columns = input.num_columns(); auto const num_rows = input.num_rows(); auto const item_size = sizeof(T); - auto* base_ptr = static_cast(output); - - auto h_srcs = make_host_vector(num_columns, stream); - auto h_dsts = make_host_vector(num_columns, stream); + auto const total_bytes = num_columns * num_rows * item_size; + CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument); CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), "All columns must have the same data type", cudf::data_type_error); CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument); + + auto* base_ptr = output.data(); + + auto h_srcs = make_host_vector(num_columns, stream); + auto h_dsts = make_host_vector(num_columns, stream); + std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { return const_cast(static_cast(col.template data())); }); + for (int i = 0; i < num_columns; ++i) { h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } @@ -136,7 +87,6 @@ void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream stream.value()); rmm::device_buffer temp_storage(temp_storage_bytes, stream); - cub::DeviceMemcpy::Batched(temp_storage.data(), temp_storage_bytes, d_srcs.begin(), @@ -148,7 +98,7 @@ void table_to_array_impl(table_view const& input, void* output, rmm::cuda_stream struct table_to_array_dispatcher { table_view const& input; - void* output; + device_span output; rmm::cuda_stream_view stream; template ())> @@ -167,11 +117,10 @@ struct table_to_array_dispatcher { } // namespace void table_to_array(table_view const& input, - void* output, + device_span output, data_type output_dtype, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(output != nullptr, "Output pointer cannot be null.", std::invalid_argument); CUDF_EXPECTS( input.num_columns() > 0, "Input must have at least one column.", std::invalid_argument); @@ -182,7 +131,7 @@ void table_to_array(table_view const& input, } // namespace detail void table_to_array(table_view const& input, - void* output, + device_span output, data_type output_dtype, rmm::cuda_stream_view stream) { diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp index 9ad26ec8a20..6d513cbc6f5 100644 --- a/cpp/tests/reshape/table_to_array_tests.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -24,11 +24,14 @@ #include #include #include +#include #include #include #include +#include + template struct TableToDeviceArrayTypedTest : public cudf::test::BaseFixture {}; @@ -91,7 +94,12 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) auto output = cudf::detail::make_zeroed_device_uvector(nrows * ncols, stream, *mr); - cudf::table_to_array(input, output.data(), dtype, stream); + cudf::table_to_array( + input, + cudf::device_span(reinterpret_cast(output.data()), + output.size() * sizeof(T)), + dtype, + stream); auto host_result = cudf::detail::make_std_vector(output, stream); EXPECT_EQ(host_result, expected); @@ -121,7 +129,12 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) auto output = cudf::detail::make_zeroed_device_uvector(num_elements, stream, *mr); - cudf::table_to_array(input, output.data(), dtype, stream); + cudf::table_to_array( + input, + cudf::device_span(reinterpret_cast(output.data()), + output.size() * sizeof(RepType)), + dtype, + stream); auto host_result = cudf::detail::make_std_vector(output, stream); @@ -138,9 +151,13 @@ TEST(TableToDeviceArrayTest, UnsupportedStringType) cudf::table_view input_table({col}); rmm::device_buffer output(3 * sizeof(int32_t), stream); - EXPECT_THROW(cudf::table_to_array( - input_table, output.data(), cudf::data_type{cudf::type_id::STRING}, stream), - cudf::logic_error); + EXPECT_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + cudf::data_type{cudf::type_id::STRING}, + stream), + cudf::logic_error); } TEST(TableToDeviceArrayTest, FailsWithNullValues) @@ -148,11 +165,32 @@ TEST(TableToDeviceArrayTest, FailsWithNullValues) auto stream = cudf::get_default_stream(); cudf::test::fixed_width_column_wrapper col({1, 2, 3}, {true, false, true}); - cudf::table_view input_table({col}); rmm::device_buffer output(3 * sizeof(int32_t), stream); EXPECT_THROW( - cudf::table_to_array(input_table, output.data(), cudf::data_type{cudf::type_id::INT32}, stream), + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + cudf::data_type{cudf::type_id::INT32}, + stream), + std::invalid_argument); +} + +TEST(TableToDeviceArrayTest, FailsWhenOutputSpanTooSmall) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({1, 2, 3}); + cudf::table_view input_table({col}); + + rmm::device_buffer output(4, stream); + + EXPECT_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + cudf::data_type{cudf::type_id::INT32}, + stream), std::invalid_argument); } From 4399938f120f524125c8a9ee58e3680d250a6b31 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 14 Apr 2025 21:53:59 -0400 Subject: [PATCH 12/17] sort file names --- cpp/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ee030bbb93e..e73090fe061 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -520,7 +520,7 @@ ConfigureTest( # * reshape test ---------------------------------------------------------------------------------- ConfigureTest( RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp - reshape/tile_tests.cpp reshape/table_to_array_tests.cpp + reshape/table_to_array_tests.cpp reshape/tile_tests.cpp ) # ################################################################################################## From c6785c7a5ece7d6238c68bff5bea5d4b2a60edad Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 21 Apr 2025 09:14:59 -0400 Subject: [PATCH 13/17] add other impl for benchmarking purposes --- cpp/src/reshape/table_to_array.cu | 118 ++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 30 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 41208be8427..7b380188778 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -41,7 +41,7 @@ namespace detail { namespace { template -void table_to_array_impl(table_view const& input, +void table_to_array_impl(cudf::table_view const& input, device_span output, rmm::cuda_stream_view stream) { @@ -56,46 +56,104 @@ void table_to_array_impl(table_view const& input, cudf::data_type_error); CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument); - auto* base_ptr = output.data(); + auto* base_ptr = reinterpret_cast(output.data()); - auto h_srcs = make_host_vector(num_columns, stream); + auto h_srcs = make_host_vector(num_columns, stream); auto h_dsts = make_host_vector(num_columns, stream); - std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { - return const_cast(static_cast(col.template data())); + std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto const& col) { + return static_cast(col.template data()); }); for (int i = 0; i < num_columns; ++i) { h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } - auto const mr = cudf::get_current_device_resource_ref(); - - auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); - auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); - - thrust::constant_iterator sizes(static_cast(item_size * num_rows)); - - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - cub::DeviceMemcpy::Batched(d_temp_storage, - temp_storage_bytes, - d_srcs.begin(), - d_dsts.begin(), - sizes, - num_columns, - stream.value()); - - rmm::device_buffer temp_storage(temp_storage_bytes, stream); - cub::DeviceMemcpy::Batched(temp_storage.data(), - temp_storage_bytes, - d_srcs.begin(), - d_dsts.begin(), - sizes, - num_columns, - stream.value()); +#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 + std::vector attrs(1); + attrs[0].srcAccessOrder = cudaMemcpySrcAccessOrderStream; + std::vector attr_idxs(num_columns, 0); + size_t fail_idx = std::numeric_limits::max(); + + std::vector sizes(num_columns, item_size * num_rows); + + CUDF_CUDA_TRY(cudaMemcpyBatchAsync(static_cast(h_dsts.data()), + const_cast(h_srcs.data()), + sizes.data(), + num_columns, + attrs.data(), + attr_idxs.data(), + attrs.size(), + &fail_idx, + stream.value())); +#else + for (int i = 0; i < num_columns; ++i) { + CUDF_CUDA_TRY(cudaMemcpyAsync(h_dsts[i], + const_cast(h_srcs[i]), + item_size * num_rows, + cudaMemcpyDeviceToDevice, + stream.value())); + } +#endif } +// template +// void table_to_array_impl(table_view const& input, +// device_span output, +// rmm::cuda_stream_view stream) +// { +// auto const num_columns = input.num_columns(); +// auto const num_rows = input.num_rows(); +// auto const item_size = sizeof(T); +// auto const total_bytes = num_columns * num_rows * item_size; + +// CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument); +// CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), +// "All columns must have the same data type", +// cudf::data_type_error); +// CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", +// std::invalid_argument); + +// auto* base_ptr = output.data(); + +// auto h_srcs = make_host_vector(num_columns, stream); +// auto h_dsts = make_host_vector(num_columns, stream); + +// std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { +// return const_cast(static_cast(col.template data())); +// }); + +// for (int i = 0; i < num_columns; ++i) { +// h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); +// } + +// auto const mr = cudf::get_current_device_resource_ref(); + +// auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); +// auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); + +// thrust::constant_iterator sizes(static_cast(item_size * num_rows)); + +// void* d_temp_storage = nullptr; +// size_t temp_storage_bytes = 0; +// cub::DeviceMemcpy::Batched(d_temp_storage, +// temp_storage_bytes, +// d_srcs.begin(), +// d_dsts.begin(), +// sizes, +// num_columns, +// stream.value()); + +// rmm::device_buffer temp_storage(temp_storage_bytes, stream); +// cub::DeviceMemcpy::Batched(temp_storage.data(), +// temp_storage_bytes, +// d_srcs.begin(), +// d_dsts.begin(), +// sizes, +// num_columns, +// stream.value()); +// } + struct table_to_array_dispatcher { table_view const& input; device_span output; From 04c837ff333086436e8c6be3c19516a3d47f7289 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 21 Apr 2025 10:54:54 -0400 Subject: [PATCH 14/17] clean up --- cpp/src/reshape/table_to_array.cu | 118 ++++++++---------------------- 1 file changed, 30 insertions(+), 88 deletions(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 7b380188778..41208be8427 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -41,7 +41,7 @@ namespace detail { namespace { template -void table_to_array_impl(cudf::table_view const& input, +void table_to_array_impl(table_view const& input, device_span output, rmm::cuda_stream_view stream) { @@ -56,104 +56,46 @@ void table_to_array_impl(cudf::table_view const& input, cudf::data_type_error); CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument); - auto* base_ptr = reinterpret_cast(output.data()); + auto* base_ptr = output.data(); - auto h_srcs = make_host_vector(num_columns, stream); + auto h_srcs = make_host_vector(num_columns, stream); auto h_dsts = make_host_vector(num_columns, stream); - std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto const& col) { - return static_cast(col.template data()); + std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { + return const_cast(static_cast(col.template data())); }); for (int i = 0; i < num_columns; ++i) { h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); } -#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080 - std::vector attrs(1); - attrs[0].srcAccessOrder = cudaMemcpySrcAccessOrderStream; - std::vector attr_idxs(num_columns, 0); - size_t fail_idx = std::numeric_limits::max(); - - std::vector sizes(num_columns, item_size * num_rows); - - CUDF_CUDA_TRY(cudaMemcpyBatchAsync(static_cast(h_dsts.data()), - const_cast(h_srcs.data()), - sizes.data(), - num_columns, - attrs.data(), - attr_idxs.data(), - attrs.size(), - &fail_idx, - stream.value())); -#else - for (int i = 0; i < num_columns; ++i) { - CUDF_CUDA_TRY(cudaMemcpyAsync(h_dsts[i], - const_cast(h_srcs[i]), - item_size * num_rows, - cudaMemcpyDeviceToDevice, - stream.value())); - } -#endif + auto const mr = cudf::get_current_device_resource_ref(); + + auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); + auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); + + thrust::constant_iterator sizes(static_cast(item_size * num_rows)); + + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceMemcpy::Batched(d_temp_storage, + temp_storage_bytes, + d_srcs.begin(), + d_dsts.begin(), + sizes, + num_columns, + stream.value()); + + rmm::device_buffer temp_storage(temp_storage_bytes, stream); + cub::DeviceMemcpy::Batched(temp_storage.data(), + temp_storage_bytes, + d_srcs.begin(), + d_dsts.begin(), + sizes, + num_columns, + stream.value()); } -// template -// void table_to_array_impl(table_view const& input, -// device_span output, -// rmm::cuda_stream_view stream) -// { -// auto const num_columns = input.num_columns(); -// auto const num_rows = input.num_rows(); -// auto const item_size = sizeof(T); -// auto const total_bytes = num_columns * num_rows * item_size; - -// CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument); -// CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), -// "All columns must have the same data type", -// cudf::data_type_error); -// CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", -// std::invalid_argument); - -// auto* base_ptr = output.data(); - -// auto h_srcs = make_host_vector(num_columns, stream); -// auto h_dsts = make_host_vector(num_columns, stream); - -// std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { -// return const_cast(static_cast(col.template data())); -// }); - -// for (int i = 0; i < num_columns; ++i) { -// h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); -// } - -// auto const mr = cudf::get_current_device_resource_ref(); - -// auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); -// auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); - -// thrust::constant_iterator sizes(static_cast(item_size * num_rows)); - -// void* d_temp_storage = nullptr; -// size_t temp_storage_bytes = 0; -// cub::DeviceMemcpy::Batched(d_temp_storage, -// temp_storage_bytes, -// d_srcs.begin(), -// d_dsts.begin(), -// sizes, -// num_columns, -// stream.value()); - -// rmm::device_buffer temp_storage(temp_storage_bytes, stream); -// cub::DeviceMemcpy::Batched(temp_storage.data(), -// temp_storage_bytes, -// d_srcs.begin(), -// d_dsts.begin(), -// sizes, -// num_columns, -// stream.value()); -// } - struct table_to_array_dispatcher { table_view const& input; device_span output; From fd0506bc5062d1c65dd17a50ef5c5d3192deecba Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 5 May 2025 14:46:14 -0400 Subject: [PATCH 15/17] address reviews --- cpp/benchmarks/reshape/table_to_array.cpp | 5 ---- cpp/include/cudf/reshape.hpp | 2 +- cpp/src/reshape/table_to_array.cu | 15 ++++------ cpp/tests/reshape/table_to_array_tests.cpp | 33 ++++++++++++++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp index 6e678b71f09..94dc3606b42 100644 --- a/cpp/benchmarks/reshape/table_to_array.cpp +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -29,11 +29,6 @@ static void bench_table_to_array(nvbench::state& state) auto const num_rows = static_cast(state.get_int64("num_rows")); auto const num_cols = static_cast(state.get_int64("columns")); - if (static_cast(num_rows) * num_cols >= - static_cast(std::numeric_limits::max())) { - state.skip("Input size exceeds cudf::size_type limit"); - } - data_profile profile = data_profile_builder() .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000) .no_validity(); diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index 794a6df757c..bda7e3aff19 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -118,7 +118,7 @@ std::unique_ptr byte_cast( * * The output buffer must be preallocated and passed as a `device_span` using * a `device_span`. It must be large enough to hold - * `num_rows * num_columns * size_of(output_dtype)` bytes. + * `num_rows * num_columns * sizeof(output_dtype)` bytes. * * @throws cudf::logic_error if columns do not all have the same type as `output_dtype` * @throws cudf::logic_error if `output_dtype` is not a fixed-width type diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index 41208be8427..d0e742d2ad6 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -58,15 +58,15 @@ void table_to_array_impl(table_view const& input, auto* base_ptr = output.data(); - auto h_srcs = make_host_vector(num_columns, stream); - auto h_dsts = make_host_vector(num_columns, stream); + auto h_srcs = make_host_vector(num_columns, stream); + auto h_dsts = make_host_vector(num_columns, stream); std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { - return const_cast(static_cast(col.template data())); + return const_cast(col.template data()); }); for (int i = 0; i < num_columns; ++i) { - h_dsts[i] = static_cast(base_ptr + i * item_size * num_rows); + h_dsts[i] = reinterpret_cast(base_ptr + i * item_size * num_rows); } auto const mr = cudf::get_current_device_resource_ref(); @@ -76,16 +76,14 @@ void table_to_array_impl(table_view const& input, thrust::constant_iterator sizes(static_cast(item_size * num_rows)); - void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; - cub::DeviceMemcpy::Batched(d_temp_storage, + cub::DeviceMemcpy::Batched(nullptr, temp_storage_bytes, d_srcs.begin(), d_dsts.begin(), sizes, num_columns, stream.value()); - rmm::device_buffer temp_storage(temp_storage_bytes, stream); cub::DeviceMemcpy::Batched(temp_storage.data(), temp_storage_bytes, @@ -121,8 +119,7 @@ void table_to_array(table_view const& input, data_type output_dtype, rmm::cuda_stream_view stream) { - CUDF_EXPECTS( - input.num_columns() > 0, "Input must have at least one column.", std::invalid_argument); + if (input.num_columns() == 0) { return; } cudf::type_dispatcher( output_dtype, table_to_array_dispatcher{input, output, stream}); diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp index 6d513cbc6f5..29cae29b58a 100644 --- a/cpp/tests/reshape/table_to_array_tests.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -194,3 +194,36 @@ TEST(TableToDeviceArrayTest, FailsWhenOutputSpanTooSmall) stream), std::invalid_argument); } + +TEST(TableToDeviceArrayTest, NoRows) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({}); + cudf::table_view input_table({col}); + + rmm::device_buffer output(0, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + cudf::data_type{cudf::type_id::INT32}, + stream)); +} + +TEST(TableToDeviceArrayTest, NoColumns) +{ + auto stream = cudf::get_default_stream(); + + cudf::table_view input_table{std::vector{}}; + + rmm::device_buffer output(0, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + cudf::data_type{cudf::type_id::INT8}, + stream)); +} From db78162263860a6d9443183011b4201f779f0484 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 5 May 2025 15:39:32 -0400 Subject: [PATCH 16/17] address review --- cpp/src/reshape/table_to_array.cu | 2 +- cpp/tests/reshape/table_to_array_tests.cpp | 24 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index d0e742d2ad6..a3ba277adde 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -48,7 +48,7 @@ void table_to_array_impl(table_view const& input, auto const num_columns = input.num_columns(); auto const num_rows = input.num_rows(); auto const item_size = sizeof(T); - auto const total_bytes = num_columns * num_rows * item_size; + auto const total_bytes = static_cast(num_columns) * num_rows * item_size; CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument); CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp index 29cae29b58a..1eac73caad2 100644 --- a/cpp/tests/reshape/table_to_array_tests.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -227,3 +227,27 @@ TEST(TableToDeviceArrayTest, NoColumns) cudf::data_type{cudf::type_id::INT8}, stream)); } + +TEST(TableToDeviceArrayTest, FlatSizeExceedsSizeTypeLimit) +{ + auto stream = cudf::get_default_stream(); + auto size_limit = static_cast(std::numeric_limits::max()); + auto num_rows = size_limit * 0.6; + auto num_cols = 2; + auto flat_size = num_rows * num_cols; + auto total_bytes = flat_size * sizeof(int8_t); + + std::vector data(num_rows, 1); + auto col = cudf::test::fixed_width_column_wrapper(data.begin(), data.end()); + + cudf::table_view input_table({col, col}); + + rmm::device_buffer output(total_bytes, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), total_bytes), + cudf::data_type{cudf::type_id::INT8}, + stream)); +} From b71ec3a6a7cb7694148674681cb719886f1d996c Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 6 May 2025 13:29:59 -0400 Subject: [PATCH 17/17] address review --- cpp/benchmarks/reshape/table_to_array.cpp | 6 ++--- cpp/include/cudf/detail/reshape.hpp | 6 ++--- cpp/include/cudf/reshape.hpp | 8 +++--- cpp/src/reshape/table_to_array.cu | 29 ++++++---------------- cpp/tests/reshape/table_to_array_tests.cpp | 11 -------- 5 files changed, 16 insertions(+), 44 deletions(-) diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp index 94dc3606b42..71b98c307a1 100644 --- a/cpp/benchmarks/reshape/table_to_array.cpp +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -37,7 +37,6 @@ static void bench_table_to_array(nvbench::state& state) auto input_view = input_table->view(); auto stream = cudf::get_default_stream(); - auto dtype = cudf::data_type{cudf::type_id::INT32}; rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream); auto span = cudf::device_span(reinterpret_cast(output.data()), @@ -47,9 +46,8 @@ static void bench_table_to_array(nvbench::state& state) state.add_global_memory_reads(num_rows * num_cols); // all bytes are read state.add_global_memory_writes(num_rows * num_cols); // all bytes are written - state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - cudf::table_to_array(input_view, span, dtype, stream); - }); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::table_to_array(input_view, span, stream); }); } NVBENCH_BENCH(bench_table_to_array) diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index 46fabcf2350..c8f9a3722ac 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -45,9 +46,8 @@ std::unique_ptr interleave_columns(table_view const& input, * @copydoc cudf::table_to_array */ void table_to_array(table_view const& input, - void* output, - data_type output_dtype, - rmm::cuda_stream_view stream); + device_span output, + rmm::cuda_stream_view stream = cudf::get_default_stream()); } // namespace detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index bda7e3aff19..a81535ee1b0 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -118,20 +118,18 @@ std::unique_ptr byte_cast( * * The output buffer must be preallocated and passed as a `device_span` using * a `device_span`. It must be large enough to hold - * `num_rows * num_columns * sizeof(output_dtype)` bytes. + * `num_rows * num_columns * sizeof(dtype)` bytes. * - * @throws cudf::logic_error if columns do not all have the same type as `output_dtype` - * @throws cudf::logic_error if `output_dtype` is not a fixed-width type + * @throws cudf::logic_error if columns do not all have the same type + * @throws cudf::logic_error if the dtype of the columns is not a fixed-width type * @throws std::invalid_argument if the output span is too small * * @param input A table with fixed-width, non-nullable columns of the same type * @param output A span representing preallocated device memory for the output - * @param output_dtype The data type of the output elements * @param stream CUDA stream used for memory operations */ void table_to_array(table_view const& input, device_span output, - cudf::data_type output_dtype, rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ // end of group diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu index a3ba277adde..60e145e02e0 100644 --- a/cpp/src/reshape/table_to_array.cu +++ b/cpp/src/reshape/table_to_array.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -76,22 +77,8 @@ void table_to_array_impl(table_view const& input, thrust::constant_iterator sizes(static_cast(item_size * num_rows)); - size_t temp_storage_bytes = 0; - cub::DeviceMemcpy::Batched(nullptr, - temp_storage_bytes, - d_srcs.begin(), - d_dsts.begin(), - sizes, - num_columns, - stream.value()); - rmm::device_buffer temp_storage(temp_storage_bytes, stream); - cub::DeviceMemcpy::Batched(temp_storage.data(), - temp_storage_bytes, - d_srcs.begin(), - d_dsts.begin(), - sizes, - num_columns, - stream.value()); + cudf::detail::batched_memcpy_async( + d_srcs.begin(), d_dsts.begin(), sizes, num_columns, stream.value()); } struct table_to_array_dispatcher { @@ -116,24 +103,24 @@ struct table_to_array_dispatcher { void table_to_array(table_view const& input, device_span output, - data_type output_dtype, rmm::cuda_stream_view stream) { - if (input.num_columns() == 0) { return; } + if (input.num_columns() == 0) return; + + auto const dtype = input.column(0).type(); cudf::type_dispatcher( - output_dtype, table_to_array_dispatcher{input, output, stream}); + dtype, table_to_array_dispatcher{input, output, stream}); } } // namespace detail void table_to_array(table_view const& input, device_span output, - data_type output_dtype, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - cudf::detail::table_to_array(input, output, output_dtype, stream); + cudf::detail::table_to_array(input, output, stream); } } // namespace cudf diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp index 1eac73caad2..46af26c6828 100644 --- a/cpp/tests/reshape/table_to_array_tests.cpp +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -64,8 +64,6 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) auto stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); - auto const dtype = cudf::data_type{cudf::type_to_id()}; - int nrows = 3; int ncols = 4; @@ -98,7 +96,6 @@ TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) input, cudf::device_span(reinterpret_cast(output.data()), output.size() * sizeof(T)), - dtype, stream); auto host_result = cudf::detail::make_std_vector(output, stream); @@ -119,7 +116,6 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) auto stream = cudf::get_default_stream(); auto mr = rmm::mr::get_current_device_resource(); auto scale = numeric::scale_type{-2}; - auto dtype = cudf::data_type{cudf::type_to_id(), scale}; fp_wrapper col0({123, 456, 789}, scale); fp_wrapper col1({321, 654, 987}, scale); @@ -133,7 +129,6 @@ TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) input, cudf::device_span(reinterpret_cast(output.data()), output.size() * sizeof(RepType)), - dtype, stream); auto host_result = cudf::detail::make_std_vector(output, stream); @@ -155,7 +150,6 @@ TEST(TableToDeviceArrayTest, UnsupportedStringType) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), output.size()), - cudf::data_type{cudf::type_id::STRING}, stream), cudf::logic_error); } @@ -172,7 +166,6 @@ TEST(TableToDeviceArrayTest, FailsWithNullValues) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), output.size()), - cudf::data_type{cudf::type_id::INT32}, stream), std::invalid_argument); } @@ -190,7 +183,6 @@ TEST(TableToDeviceArrayTest, FailsWhenOutputSpanTooSmall) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), output.size()), - cudf::data_type{cudf::type_id::INT32}, stream), std::invalid_argument); } @@ -208,7 +200,6 @@ TEST(TableToDeviceArrayTest, NoRows) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), output.size()), - cudf::data_type{cudf::type_id::INT32}, stream)); } @@ -224,7 +215,6 @@ TEST(TableToDeviceArrayTest, NoColumns) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), output.size()), - cudf::data_type{cudf::type_id::INT8}, stream)); } @@ -248,6 +238,5 @@ TEST(TableToDeviceArrayTest, FlatSizeExceedsSizeTypeLimit) cudf::table_to_array(input_table, cudf::device_span( reinterpret_cast(output.data()), total_bytes), - cudf::data_type{cudf::type_id::INT8}, stream)); }