Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e201564
Add a public API for converting a table_view to device array
Matt711 Apr 7, 2025
6e9289e
support decimals and add more tests
Matt711 Apr 7, 2025
21a9201
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
7eb3690
fallback if cuda version < 12.8
Matt711 Apr 8, 2025
854cbbf
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
5a9d195
clean up
Matt711 Apr 8, 2025
e5e65cc
address reviews
Matt711 Apr 8, 2025
0343bb3
address review
Matt711 Apr 8, 2025
7530ecf
use snake case
Matt711 Apr 8, 2025
c7ab103
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 9, 2025
eec23b5
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 10, 2025
f027616
address reviews
Matt711 Apr 10, 2025
b3251fe
clean up
Matt711 Apr 10, 2025
f5a0096
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 14, 2025
96ef619
address review
Matt711 Apr 14, 2025
82c5b22
pass a device_span instead of a raw pointer
Matt711 Apr 14, 2025
4399938
sort file names
Matt711 Apr 15, 2025
737fa22
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
36abd46
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
1fb9563
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 21, 2025
c6785c7
add other impl for benchmarking purposes
Matt711 Apr 21, 2025
04c837f
clean up
Matt711 Apr 21, 2025
1530b5f
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 5, 2025
fd0506b
address reviews
Matt711 May 5, 2025
db78162
address review
Matt711 May 5, 2025
f5bf21c
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 6, 2025
b71ec3a
address review
Matt711 May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ add_library(
src/reshape/byte_cast.cu
src/reshape/interleave_columns.cu
src/reshape/tile.cu
src/reshape/table_to_array.cu
src/rolling/detail/optimized_unbounded_window.cpp
src/rolling/detail/rolling_collect_list.cu
src/rolling/detail/rolling_fixed_window.cu
Expand Down
25 changes: 24 additions & 1 deletion cpp/include/cudf/reshape.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -107,6 +107,29 @@ std::unique_ptr<column> byte_cast(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Copies a table into a contiguous column-major device array.
*
* This function converts a table_view with columns of the same type
* into a 2D device array in column-major order. The output buffer must be
* preallocated and large enough to hold `num_rows * num_columns` values of `output_dtype`.
*
* @throws cudf::logic_error if column types do not match `output_dtype`
* @throws cudf::logic_error if `output_dtype` is not fixed-width or is a fixed-point type
*
* @param input A table with fixed-width, non-nullable columns of the same type
* @param output Pointer to device memory large enough to hold `num_rows * num_columns` values
* @param output_dtype The logical data type of the output array
* @param stream CUDA stream used for memory operations
* @param mr Memory resource used for device allocations (currently unused)
*/
void table_to_device_array(
table_view const& input,
void* output,
cudf::data_type output_dtype,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/** @} */ // end of group

} // namespace CUDF_EXPORT cudf
113 changes: 113 additions & 0 deletions cpp/src/reshape/table_to_array.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/reshape.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <cub/device/device_memcpy.cuh>
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

namespace cudf {
namespace {

template <typename T>
void _table_to_device_array(cudf::table_view const& input,
void* output,
rmm::cuda_stream_view stream)
{
auto const num_columns = input.num_columns();
auto const num_rows = input.num_rows();
auto const item_size = sizeof(T);

std::vector<void*> dsts(num_columns);
std::vector<void const*> srcs(num_columns);
std::vector<size_t> sizes(num_columns, item_size * num_rows);

auto* base_ptr = static_cast<uint8_t*>(output);

for (int i = 0; i < num_columns; ++i) {
auto const& col = input.column(i);
CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype");

auto* src_ptr = static_cast<void const*>(col.data<T>());
auto* dst_ptr = base_ptr + i * item_size * num_rows;

srcs[i] = src_ptr;
dsts[i] = dst_ptr;
}

cudaMemcpyAttributes attr{};
attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream;
std::vector<cudaMemcpyAttributes> attrs{attr};
std::vector<size_t> attr_idxs{0};
size_t fail_idx = SIZE_MAX;

CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(),
const_cast<void**>(srcs.data()),
sizes.data(),
num_columns,
attrs.data(),
attr_idxs.data(),
attrs.size(),
&fail_idx,
stream.value()));
}

struct TableToArrayDispatcher {
table_view const& input;
void* output;
rmm::cuda_stream_view stream;

template <typename T, CUDF_ENABLE_IF(is_fixed_width<T>() || is_fixed_point<T>())>
void operator()() const
{
if constexpr (is_fixed_point<T>()) {
using StorageType = cudf::device_storage_type_t<T>;
_table_to_device_array<StorageType>(input, output, stream);
} else {
_table_to_device_array<T>(input, output, stream);
}
}

template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>() && !is_fixed_point<T>())>
void operator()() const
{
CUDF_FAIL("Unsupported dtype");
}
};

} // namespace

void table_to_device_array(table_view const& input,
void* output,
data_type output_dtype,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref)
{
CUDF_FUNC_RANGE();
cudf::type_dispatcher(output_dtype, TableToArrayDispatcher{input, output, stream});
}

} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ ConfigureTest(
# * reshape test ----------------------------------------------------------------------------------
ConfigureTest(
RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp
reshape/tile_tests.cpp
reshape/tile_tests.cpp reshape/table_to_array.cpp
)

# ##################################################################################################
Expand Down
148 changes: 148 additions & 0 deletions cpp/tests/reshape/table_to_array.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_list_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/reshape.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>

template <typename T>
struct TableToDeviceArrayTypedTest : public cudf::test::BaseFixture {};

using SupportedTypes = cudf::test::Types<int8_t,
int16_t,
int32_t,
int64_t,
uint8_t,
uint16_t,
uint32_t,
uint64_t,
float,
double,
cudf::timestamp_D,
cudf::timestamp_s,
cudf::timestamp_ms,
cudf::timestamp_us,
cudf::timestamp_ns,
cudf::duration_D,
cudf::duration_s,
cudf::duration_ms,
cudf::duration_us,
cudf::duration_ns>;

TYPED_TEST_SUITE(TableToDeviceArrayTypedTest, SupportedTypes);

TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes)
{
using T = TypeParam;
auto stream = cudf::get_default_stream();

auto const dtype = cudf::data_type{cudf::type_to_id<T>()};

auto const col0 = cudf::test::make_type_param_vector<T>({1, 2, 3});
auto const col1 = cudf::test::make_type_param_vector<T>({4, 5, 6});
auto const col2 = cudf::test::make_type_param_vector<T>({7, 8, 9});
auto const col3 = cudf::test::make_type_param_vector<T>({10, 11, 12});

std::vector<std::unique_ptr<cudf::column>> cols;
auto make_col = [&](auto const& data) {
return std::make_unique<cudf::column>(
cudf::test::fixed_width_column_wrapper<T>(data.begin(), data.end()));
};

cols.push_back(make_col(col0));
cols.push_back(make_col(col1));
cols.push_back(make_col(col2));
cols.push_back(make_col(col3));

cudf::table_view input({cols[0]->view(), cols[1]->view(), cols[2]->view(), cols[3]->view()});
size_t num_elements = 3 * 4;
rmm::device_buffer output(num_elements * sizeof(T), stream);

cudf::table_to_device_array(
input, output.data(), dtype, stream, rmm::mr::get_current_device_resource());

std::vector<T> host_result(num_elements);
CUDF_CUDA_TRY(cudaMemcpy(
host_result.data(), output.data(), num_elements * sizeof(T), cudaMemcpyDeviceToHost));

auto const expected_data =
cudf::test::make_type_param_vector<T>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
std::vector<T> expected(expected_data.begin(), expected_data.end());

EXPECT_EQ(host_result, expected);
}

template <typename T>
struct FixedPointTableToDeviceArrayTest : public cudf::test::BaseFixture {};

TYPED_TEST_SUITE(FixedPointTableToDeviceArrayTest, cudf::test::FixedPointTypes);

TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes)
{
using decimalXX = TypeParam;
using RepType = cudf::device_storage_type_t<decimalXX>;
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;

auto stream = cudf::get_default_stream();
auto scale = numeric::scale_type{-2};
auto dtype = cudf::data_type{cudf::type_to_id<decimalXX>(), scale};

fp_wrapper col0({123, 456, 789}, scale);
fp_wrapper col1({321, 654, 987}, scale);

cudf::table_view input({col0, col1});
rmm::device_buffer output(2 * 3 * sizeof(RepType), stream);

cudf::table_to_device_array(
input, output.data(), dtype, stream, rmm::mr::get_current_device_resource());

std::vector<RepType> host_result(6);
CUDF_CUDA_TRY(cudaMemcpy(host_result.data(),
output.data(),
host_result.size() * sizeof(RepType),
cudaMemcpyDeviceToHost));

std::vector<RepType> expected{123, 456, 789, 321, 654, 987};
EXPECT_EQ(host_result, expected);
}

struct TableToDeviceArrayTest : public cudf::test::BaseFixture {};

TEST(TableToDeviceArrayTest, UnsupportedStringType)
{
auto stream = cudf::get_default_stream();
auto col = cudf::test::strings_column_wrapper({"a", "b", "c"});
cudf::table_view input_table({col});
rmm::device_buffer output(3 * sizeof(int32_t), stream);

EXPECT_THROW(cudf::table_to_device_array(input_table,
output.data(),
cudf::data_type{cudf::type_id::STRING},
stream,
rmm::mr::get_current_device_resource()),
cudf::logic_error);
}
Loading