Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e201564
Add a public API for converting a table_view to device array
Matt711 Apr 7, 2025
6e9289e
support decimals and add more tests
Matt711 Apr 7, 2025
21a9201
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
7eb3690
fallback if cuda version < 12.8
Matt711 Apr 8, 2025
854cbbf
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
5a9d195
clean up
Matt711 Apr 8, 2025
e5e65cc
address reviews
Matt711 Apr 8, 2025
0343bb3
address review
Matt711 Apr 8, 2025
7530ecf
use snake case
Matt711 Apr 8, 2025
c7ab103
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 9, 2025
eec23b5
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 10, 2025
f027616
address reviews
Matt711 Apr 10, 2025
b3251fe
clean up
Matt711 Apr 10, 2025
f5a0096
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 14, 2025
96ef619
address review
Matt711 Apr 14, 2025
82c5b22
pass a device_span instead of a raw pointer
Matt711 Apr 14, 2025
4399938
sort file names
Matt711 Apr 15, 2025
737fa22
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
36abd46
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
1fb9563
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 21, 2025
c6785c7
add other impl for benchmarking purposes
Matt711 Apr 21, 2025
04c837f
clean up
Matt711 Apr 21, 2025
1530b5f
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 5, 2025
fd0506b
address reviews
Matt711 May 5, 2025
db78162
address review
Matt711 May 5, 2025
f5bf21c
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 6, 2025
b71ec3a
address review
Matt711 May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions cpp/src/reshape/table_to_array.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,49 @@
namespace cudf {
namespace {

// template <typename T>
// void _table_to_device_array(cudf::table_view const& input,
// void* output,
// rmm::cuda_stream_view stream)
// {
// auto const num_columns = input.num_columns();
// auto const num_rows = input.num_rows();
// auto const item_size = sizeof(T);

// std::vector<void*> dsts(num_columns);
// std::vector<void const*> srcs(num_columns);
// std::vector<size_t> sizes(num_columns, item_size * num_rows);

// auto* base_ptr = static_cast<uint8_t*>(output);

// for (int i = 0; i < num_columns; ++i) {
// auto const& col = input.column(i);
// CUDF_EXPECTS(col.type() == input.column(0).type(), "All columns must have the same dtype");

// auto* src_ptr = static_cast<void const*>(col.data<T>());
// auto* dst_ptr = base_ptr + i * item_size * num_rows;

// srcs[i] = src_ptr;
// dsts[i] = dst_ptr;
// }

// cudaMemcpyAttributes attr{};
// attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream;
// std::vector<cudaMemcpyAttributes> attrs{attr};
// std::vector<size_t> attr_idxs{0};
// size_t fail_idx = SIZE_MAX;

// CUDF_CUDA_TRY(cudaMemcpyBatchAsync(dsts.data(),
// const_cast<void**>(srcs.data()),
// sizes.data(),
// num_columns,
// attrs.data(),
// attr_idxs.data(),
// attrs.size(),
// &fail_idx,
// stream.value()));
// }

template <typename T>
void _table_to_device_array(cudf::table_view const& input,
void* output,
Expand All @@ -58,6 +101,7 @@ void _table_to_device_array(cudf::table_view const& input,
dsts[i] = dst_ptr;
}

#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
cudaMemcpyAttributes attr{};
attr.srcAccessOrder = cudaMemcpySrcAccessOrderStream;
std::vector<cudaMemcpyAttributes> attrs{attr};
Expand All @@ -73,8 +117,14 @@ void _table_to_device_array(cudf::table_view const& input,
attrs.size(),
&fail_idx,
stream.value()));
#else
for (int i = 0; i < num_columns; ++i) {
CUDF_CUDA_TRY(cudaMemcpyAsync(dsts[i], srcs[i], sizes[i], cudaMemcpyDeviceToDevice, stream.value()));
}
#endif
}


struct TableToArrayDispatcher {
table_view const& input;
void* output;
Expand Down
Loading