Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e201564
Add a public API for converting a table_view to device array
Matt711 Apr 7, 2025
6e9289e
support decimals and add more tests
Matt711 Apr 7, 2025
21a9201
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
7eb3690
fallback if cuda version < 12.8
Matt711 Apr 8, 2025
854cbbf
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 8, 2025
5a9d195
clean up
Matt711 Apr 8, 2025
e5e65cc
address reviews
Matt711 Apr 8, 2025
0343bb3
address review
Matt711 Apr 8, 2025
7530ecf
use snake case
Matt711 Apr 8, 2025
c7ab103
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 9, 2025
eec23b5
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 10, 2025
f027616
address reviews
Matt711 Apr 10, 2025
b3251fe
clean up
Matt711 Apr 10, 2025
f5a0096
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 14, 2025
96ef619
address review
Matt711 Apr 14, 2025
82c5b22
pass a device_span instead of a raw pointer
Matt711 Apr 14, 2025
4399938
sort file names
Matt711 Apr 15, 2025
737fa22
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
36abd46
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 15, 2025
1fb9563
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 Apr 21, 2025
c6785c7
add other impl for benchmarking purposes
Matt711 Apr 21, 2025
04c837f
clean up
Matt711 Apr 21, 2025
1530b5f
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 5, 2025
fd0506b
address reviews
Matt711 May 5, 2025
db78162
address review
Matt711 May 5, 2025
f5bf21c
Merge branch 'branch-25.06' into fea/cpp/table-to-device-array
Matt711 May 6, 2025
b71ec3a
address review
Matt711 May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ add_library(
src/replace/replace.cu
src/reshape/byte_cast.cu
src/reshape/interleave_columns.cu
src/reshape/table_to_array.cu
src/reshape/tile.cu
src/rolling/detail/optimized_unbounded_window.cpp
src/rolling/detail/rolling_collect_list.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
# ##################################################################################################
# * reshape benchmark
# ---------------------------------------------------------------------------------
ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp reshape/table_to_array.cpp)

# ##################################################################################################
# * rolling benchmark
Expand Down
56 changes: 56 additions & 0 deletions cpp/benchmarks/reshape/table_to_array.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/reshape.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/span.hpp>

#include <cuda/functional>

#include <nvbench/nvbench.cuh>

static void bench_table_to_array(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_cols = static_cast<cudf::size_type>(state.get_int64("columns"));

data_profile profile = data_profile_builder()
.distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000)
.no_validity();
std::vector<cudf::type_id> types(num_cols, cudf::type_id::INT32);
auto input_table = create_random_table(types, row_count{num_rows}, profile);

auto input_view = input_table->view();
auto stream = cudf::get_default_stream();

rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream);
auto span = cudf::device_span<cuda::std::byte>(reinterpret_cast<cuda::std::byte*>(output.data()),
output.size());

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.add_global_memory_reads<int32_t>(num_rows * num_cols); // all bytes are read
state.add_global_memory_writes<int32_t>(num_rows * num_cols); // all bytes are written

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::table_to_array(input_view, span, stream); });
}

NVBENCH_BENCH(bench_table_to_array)
.set_name("table_to_array")
.add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
.add_int64_axis("columns", {2, 10, 100});
10 changes: 9 additions & 1 deletion cpp/include/cudf/detail/reshape.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/memory_resource.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -41,5 +42,12 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
rmm::cuda_stream_view,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::table_to_array
*/
void table_to_array(table_view const& input,
device_span<cuda::std::byte> output,
rmm::cuda_stream_view stream = cudf::get_default_stream());

} // namespace detail
} // namespace CUDF_EXPORT cudf
27 changes: 26 additions & 1 deletion cpp/include/cudf/reshape.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,9 @@
#include <cudf/types.hpp>
#include <cudf/utilities/export.hpp>
#include <cudf/utilities/memory_resource.hpp>
#include <cudf/utilities/span.hpp>

#include <cuda/functional>

#include <memory>

Expand Down Expand Up @@ -107,6 +110,28 @@ std::unique_ptr<column> byte_cast(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Copies a table into a contiguous column-major device array.
*
* This function copies a `table_view` with columns of the same fixed-width type
* into a 2D device array stored in column-major order.
*
* The output buffer must be preallocated and passed as a `device_span` using
* a `device_span<cuda::std::byte>`. It must be large enough to hold
* `num_rows * num_columns * sizeof(dtype)` bytes.
*
* @throws cudf::logic_error if columns do not all have the same type
* @throws cudf::logic_error if the dtype of the columns is not a fixed-width type
* @throws std::invalid_argument if the output span is too small
*
* @param input A table with fixed-width, non-nullable columns of the same type
* @param output A span representing preallocated device memory for the output
* @param stream CUDA stream used for memory operations
*/
void table_to_array(table_view const& input,
device_span<cuda::std::byte> output,
rmm::cuda_stream_view stream = cudf::get_default_stream());

/** @} */ // end of group

} // namespace CUDF_EXPORT cudf
126 changes: 126 additions & 0 deletions cpp/src/reshape/table_to_array.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/reshape.hpp>
#include <cudf/detail/utilities/batched_memcpy.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/reshape.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/type_checks.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <cub/device/device_memcpy.cuh>
#include <cuda/functional>
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

namespace cudf {
namespace detail {
namespace {

template <typename T>
void table_to_array_impl(table_view const& input,
device_span<cuda::std::byte> output,
rmm::cuda_stream_view stream)
{
auto const num_columns = input.num_columns();
auto const num_rows = input.num_rows();
auto const item_size = sizeof(T);
auto const total_bytes = static_cast<size_t>(num_columns) * num_rows * item_size;

CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument);
CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()),
"All columns must have the same data type",
cudf::data_type_error);
CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument);

auto* base_ptr = output.data();

auto h_srcs = make_host_vector<T const*>(num_columns, stream);
auto h_dsts = make_host_vector<T*>(num_columns, stream);

std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) {
return const_cast<T*>(col.template data<T>());
});

for (int i = 0; i < num_columns; ++i) {
h_dsts[i] = reinterpret_cast<T*>(base_ptr + i * item_size * num_rows);
}

auto const mr = cudf::get_current_device_resource_ref();

auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr);
auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr);

thrust::constant_iterator<size_t> sizes(static_cast<size_t>(item_size * num_rows));

cudf::detail::batched_memcpy_async(
d_srcs.begin(), d_dsts.begin(), sizes, num_columns, stream.value());
}

struct table_to_array_dispatcher {
table_view const& input;
device_span<cuda::std::byte> output;
rmm::cuda_stream_view stream;

template <typename T, CUDF_ENABLE_IF(is_fixed_width<T>())>
void operator()() const
{
table_to_array_impl<T>(input, output, stream);
}

template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>())>
void operator()() const
{
CUDF_FAIL("Unsupported dtype");
}
};

} // namespace

void table_to_array(table_view const& input,
device_span<cuda::std::byte> output,
rmm::cuda_stream_view stream)
{
if (input.num_columns() == 0) return;

auto const dtype = input.column(0).type();

cudf::type_dispatcher<cudf::dispatch_storage_type>(
dtype, table_to_array_dispatcher{input, output, stream});
}

} // namespace detail

void table_to_array(table_view const& input,
device_span<cuda::std::byte> output,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
cudf::detail::table_to_array(input, output, stream);
}

} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ ConfigureTest(
# * reshape test ----------------------------------------------------------------------------------
ConfigureTest(
RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp
reshape/tile_tests.cpp
reshape/table_to_array_tests.cpp reshape/tile_tests.cpp
)

# ##################################################################################################
Expand Down
Loading
Loading