diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d1d9367c831..ca0f95652a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -647,6 +647,7 @@ add_library( src/replace/replace.cu src/reshape/byte_cast.cu src/reshape/interleave_columns.cu + src/reshape/table_to_array.cu src/reshape/tile.cu src/rolling/detail/optimized_unbounded_window.cpp src/rolling/detail/rolling_collect_list.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index f71dcdcffaf..bc75c22e5fe 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -435,7 +435,7 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp) # ################################################################################################## # * reshape benchmark # --------------------------------------------------------------------------------- -ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp) +ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp reshape/table_to_array.cpp) # ################################################################################################## # * rolling benchmark diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp new file mode 100644 index 00000000000..71b98c307a1 --- /dev/null +++ b/cpp/benchmarks/reshape/table_to_array.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +#include + +static void bench_table_to_array(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_cols = static_cast(state.get_int64("columns")); + + data_profile profile = data_profile_builder() + .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000) + .no_validity(); + std::vector types(num_cols, cudf::type_id::INT32); + auto input_table = create_random_table(types, row_count{num_rows}, profile); + + auto input_view = input_table->view(); + auto stream = cudf::get_default_stream(); + + rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream); + auto span = cudf::device_span(reinterpret_cast(output.data()), + output.size()); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_global_memory_reads(num_rows * num_cols); // all bytes are read + state.add_global_memory_writes(num_rows * num_cols); // all bytes are written + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::table_to_array(input_view, span, stream); }); +} + +NVBENCH_BENCH(bench_table_to_array) + .set_name("table_to_array") + .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216}) + .add_int64_axis("columns", {2, 10, 100}); diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index aeeed282d8b..c8f9a3722ac 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -41,5 +42,12 @@ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view, rmm::device_async_resource_ref mr); +/** + * @copydoc cudf::table_to_array + */ +void table_to_array(table_view const& input, + device_span output, + rmm::cuda_stream_view stream = cudf::get_default_stream()); + } // namespace detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index e437e7abfca..a81535ee1b0 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,9 @@ #include #include #include +#include + +#include #include @@ -107,6 +110,28 @@ std::unique_ptr byte_cast( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Copies a table into a contiguous column-major device array. + * + * This function copies a `table_view` with columns of the same fixed-width type + * into a 2D device array stored in column-major order. + * + * The output buffer must be preallocated and passed as a `device_span` using + * a `device_span`. It must be large enough to hold + * `num_rows * num_columns * sizeof(dtype)` bytes. + * + * @throws cudf::logic_error if columns do not all have the same type + * @throws cudf::logic_error if the dtype of the columns is not a fixed-width type + * @throws std::invalid_argument if the output span is too small + * + * @param input A table with fixed-width, non-nullable columns of the same type + * @param output A span representing preallocated device memory for the output + * @param stream CUDA stream used for memory operations + */ +void table_to_array(table_view const& input, + device_span output, + rmm::cuda_stream_view stream = cudf::get_default_stream()); + /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu new file mode 100644 index 00000000000..60e145e02e0 --- /dev/null +++ b/cpp/src/reshape/table_to_array.cu @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cudf { +namespace detail { +namespace { + +template +void table_to_array_impl(table_view const& input, + device_span output, + rmm::cuda_stream_view stream) +{ + auto const num_columns = input.num_columns(); + auto const num_rows = input.num_rows(); + auto const item_size = sizeof(T); + auto const total_bytes = static_cast(num_columns) * num_rows * item_size; + + CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument); + CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()), + "All columns must have the same data type", + cudf::data_type_error); + CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument); + + auto* base_ptr = output.data(); + + auto h_srcs = make_host_vector(num_columns, stream); + auto h_dsts = make_host_vector(num_columns, stream); + + std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) { + return const_cast(col.template data()); + }); + + for (int i = 0; i < num_columns; ++i) { + h_dsts[i] = reinterpret_cast(base_ptr + i * item_size * num_rows); + } + + auto const mr = cudf::get_current_device_resource_ref(); + + auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr); + auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr); + + thrust::constant_iterator sizes(static_cast(item_size * num_rows)); + + cudf::detail::batched_memcpy_async( + d_srcs.begin(), d_dsts.begin(), sizes, num_columns, stream.value()); +} + +struct table_to_array_dispatcher { + table_view const& input; + device_span output; + rmm::cuda_stream_view stream; + + template ())> + void operator()() const + { + table_to_array_impl(input, output, stream); + } + + template ())> + void operator()() const + { + CUDF_FAIL("Unsupported dtype"); + } +}; + +} // namespace + +void table_to_array(table_view const& input, + device_span output, + rmm::cuda_stream_view stream) +{ + if (input.num_columns() == 0) return; + + auto const dtype = input.column(0).type(); + + cudf::type_dispatcher( + dtype, table_to_array_dispatcher{input, output, stream}); +} + +} // namespace detail + +void table_to_array(table_view const& input, + device_span output, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + cudf::detail::table_to_array(input, output, stream); +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 22b231ffb7f..5c8b5fe233b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -527,7 +527,7 @@ ConfigureTest( # * reshape test ---------------------------------------------------------------------------------- ConfigureTest( RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp - reshape/tile_tests.cpp + reshape/table_to_array_tests.cpp reshape/tile_tests.cpp ) # ################################################################################################## diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp new file mode 100644 index 00000000000..46af26c6828 --- /dev/null +++ b/cpp/tests/reshape/table_to_array_tests.cpp @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +template +struct TableToDeviceArrayTypedTest : public cudf::test::BaseFixture {}; + +using SupportedTypes = cudf::test::Types; + +TYPED_TEST_SUITE(TableToDeviceArrayTypedTest, SupportedTypes); + +TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes) +{ + using T = TypeParam; + auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); + + int nrows = 3; + int ncols = 4; + + std::vector> cols; + std::vector expected; + + for (int col = 0; col < ncols; ++col) { + std::vector data(nrows); + for (int row = 0; row < nrows; ++row) { + auto val = col * nrows + row + 1; + if constexpr (cudf::is_chrono()) { + data[row] = T(typename T::duration{val}); + } else { + data[row] = static_cast(val); + } + expected.push_back(data[row]); + } + cols.push_back(std::make_unique( + cudf::test::fixed_width_column_wrapper(data.begin(), data.end()))); + } + + std::vector views(cols.size()); + std::transform( + cols.begin(), cols.end(), views.begin(), [](auto const& col) { return col->view(); }); + cudf::table_view input{views}; + + auto output = cudf::detail::make_zeroed_device_uvector(nrows * ncols, stream, *mr); + + cudf::table_to_array( + input, + cudf::device_span(reinterpret_cast(output.data()), + output.size() * sizeof(T)), + stream); + + auto host_result = cudf::detail::make_std_vector(output, stream); + EXPECT_EQ(host_result, expected); +} + +template +struct FixedPointTableToDeviceArrayTest : public cudf::test::BaseFixture {}; + +TYPED_TEST_SUITE(FixedPointTableToDeviceArrayTest, cudf::test::FixedPointTypes); + +TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes) +{ + using decimalXX = TypeParam; + using RepType = cudf::device_storage_type_t; + using fp_wrapper = cudf::test::fixed_point_column_wrapper; + + auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); + auto scale = numeric::scale_type{-2}; + + fp_wrapper col0({123, 456, 789}, scale); + fp_wrapper col1({321, 654, 987}, scale); + + cudf::table_view input({col0, col1}); + size_t num_elements = input.num_rows() * input.num_columns(); + + auto output = cudf::detail::make_zeroed_device_uvector(num_elements, stream, *mr); + + cudf::table_to_array( + input, + cudf::device_span(reinterpret_cast(output.data()), + output.size() * sizeof(RepType)), + stream); + + auto host_result = cudf::detail::make_std_vector(output, stream); + + std::vector expected{123, 456, 789, 321, 654, 987}; + EXPECT_EQ(host_result, expected); +} + +struct TableToDeviceArrayTest : public cudf::test::BaseFixture {}; + +TEST(TableToDeviceArrayTest, UnsupportedStringType) +{ + auto stream = cudf::get_default_stream(); + auto col = cudf::test::strings_column_wrapper({"a", "b", "c"}); + cudf::table_view input_table({col}); + rmm::device_buffer output(3 * sizeof(int32_t), stream); + + EXPECT_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + stream), + cudf::logic_error); +} + +TEST(TableToDeviceArrayTest, FailsWithNullValues) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({1, 2, 3}, {true, false, true}); + cudf::table_view input_table({col}); + rmm::device_buffer output(3 * sizeof(int32_t), stream); + + EXPECT_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + stream), + std::invalid_argument); +} + +TEST(TableToDeviceArrayTest, FailsWhenOutputSpanTooSmall) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({1, 2, 3}); + cudf::table_view input_table({col}); + + rmm::device_buffer output(4, stream); + + EXPECT_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + stream), + std::invalid_argument); +} + +TEST(TableToDeviceArrayTest, NoRows) +{ + auto stream = cudf::get_default_stream(); + + cudf::test::fixed_width_column_wrapper col({}); + cudf::table_view input_table({col}); + + rmm::device_buffer output(0, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + stream)); +} + +TEST(TableToDeviceArrayTest, NoColumns) +{ + auto stream = cudf::get_default_stream(); + + cudf::table_view input_table{std::vector{}}; + + rmm::device_buffer output(0, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), output.size()), + stream)); +} + +TEST(TableToDeviceArrayTest, FlatSizeExceedsSizeTypeLimit) +{ + auto stream = cudf::get_default_stream(); + auto size_limit = static_cast(std::numeric_limits::max()); + auto num_rows = size_limit * 0.6; + auto num_cols = 2; + auto flat_size = num_rows * num_cols; + auto total_bytes = flat_size * sizeof(int8_t); + + std::vector data(num_rows, 1); + auto col = cudf::test::fixed_width_column_wrapper(data.begin(), data.end()); + + cudf::table_view input_table({col, col}); + + rmm::device_buffer output(total_bytes, stream); + + EXPECT_NO_THROW( + cudf::table_to_array(input_table, + cudf::device_span( + reinterpret_cast(output.data()), total_bytes), + stream)); +}