diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d1d9367c831..ca0f95652a8 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -647,6 +647,7 @@ add_library(
   src/replace/replace.cu
   src/reshape/byte_cast.cu
   src/reshape/interleave_columns.cu
+  src/reshape/table_to_array.cu
   src/reshape/tile.cu
   src/rolling/detail/optimized_unbounded_window.cpp
   src/rolling/detail/rolling_collect_list.cu
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index f71dcdcffaf..bc75c22e5fe 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -435,7 +435,7 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
 # ##################################################################################################
 # * reshape benchmark
 # ---------------------------------------------------------------------------------
-ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
+ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp reshape/table_to_array.cpp)
 
 # ##################################################################################################
 # * rolling benchmark
diff --git a/cpp/benchmarks/reshape/table_to_array.cpp b/cpp/benchmarks/reshape/table_to_array.cpp
new file mode 100644
index 00000000000..71b98c307a1
--- /dev/null
+++ b/cpp/benchmarks/reshape/table_to_array.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/reshape.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <cuda/functional>
+
+#include <nvbench/nvbench.cuh>
+
+static void bench_table_to_array(nvbench::state& state)
+{
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_cols = static_cast<cudf::size_type>(state.get_int64("columns"));
+
+  data_profile profile = data_profile_builder()
+                           .distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, 1000)
+                           .no_validity();
+  std::vector<cudf::type_id> types(num_cols, cudf::type_id::INT32);
+  auto input_table = create_random_table(types, row_count{num_rows}, profile);
+
+  auto input_view = input_table->view();
+  auto stream     = cudf::get_default_stream();
+
+  rmm::device_buffer output(num_rows * num_cols * sizeof(int32_t), stream);
+  auto span = cudf::device_span<cuda::std::byte>(reinterpret_cast<cuda::std::byte*>(output.data()),
+                                                 output.size());
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_global_memory_reads<int32_t>(num_rows * num_cols);   // all bytes are read
+  state.add_global_memory_writes<int32_t>(num_rows * num_cols);  // all bytes are written
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { cudf::table_to_array(input_view, span, stream); });
+}
+
+NVBENCH_BENCH(bench_table_to_array)
+  .set_name("table_to_array")
+  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("columns", {2, 10, 100});
diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp
index aeeed282d8b..c8f9a3722ac 100644
--- a/cpp/include/cudf/detail/reshape.hpp
+++ b/cpp/include/cudf/detail/reshape.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -41,5 +42,12 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
                                            rmm::cuda_stream_view,
                                            rmm::device_async_resource_ref mr);
 
+/**
+ * @copydoc cudf::table_to_array
+ */
+void table_to_array(table_view const& input,
+                    device_span<cuda::std::byte> output,
+                    rmm::cuda_stream_view stream = cudf::get_default_stream());
+
 }  // namespace detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp
index e437e7abfca..a81535ee1b0 100644
--- a/cpp/include/cudf/reshape.hpp
+++ b/cpp/include/cudf/reshape.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,9 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/export.hpp>
 #include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <cuda/functional>
 
 #include <memory>
 
@@ -107,6 +110,28 @@ std::unique_ptr<column> byte_cast(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Copies a table into a contiguous column-major device array.
+ *
+ * This function copies a `table_view` with columns of the same fixed-width type
+ * into a 2D device array stored in column-major order.
+ *
+ * The output buffer must be preallocated and passed as a `device_span` using
+ * a `device_span<cuda::std::byte>`. It must be large enough to hold
+ * `num_rows * num_columns * sizeof(dtype)` bytes.
+ *
+ * @throws cudf::logic_error if columns do not all have the same type
+ * @throws cudf::logic_error if the dtype of the columns is not a fixed-width type
+ * @throws std::invalid_argument if the output span is too small
+ *
+ * @param input A table with fixed-width, non-nullable columns of the same type
+ * @param output A span representing preallocated device memory for the output
+ * @param stream CUDA stream used for memory operations
+ */
+void table_to_array(table_view const& input,
+                    device_span<cuda::std::byte> output,
+                    rmm::cuda_stream_view stream = cudf::get_default_stream());
+
 /** @} */  // end of group
 
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/reshape/table_to_array.cu b/cpp/src/reshape/table_to_array.cu
new file mode 100644
index 00000000000..60e145e02e0
--- /dev/null
+++ b/cpp/src/reshape/table_to_array.cu
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/reshape.hpp>
+#include <cudf/detail/utilities/batched_memcpy.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/reshape.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+#include <cudf/utilities/type_checks.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cub/device/device_memcpy.cuh>
+#include <cuda/functional>
+#include <cuda_runtime.h>
+#include <thrust/device_vector.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+
+namespace cudf {
+namespace detail {
+namespace {
+
+template <typename T>
+void table_to_array_impl(table_view const& input,
+                         device_span<cuda::std::byte> output,
+                         rmm::cuda_stream_view stream)
+{
+  auto const num_columns = input.num_columns();
+  auto const num_rows    = input.num_rows();
+  auto const item_size   = sizeof(T);
+  auto const total_bytes = static_cast<size_t>(num_columns) * num_rows * item_size;
+
+  CUDF_EXPECTS(output.size() >= total_bytes, "Output span is too small", std::invalid_argument);
+  CUDF_EXPECTS(cudf::all_have_same_types(input.begin(), input.end()),
+               "All columns must have the same data type",
+               cudf::data_type_error);
+  CUDF_EXPECTS(!cudf::has_nulls(input), "All columns must contain no nulls", std::invalid_argument);
+
+  auto* base_ptr = output.data();
+
+  auto h_srcs = make_host_vector<T const*>(num_columns, stream);
+  auto h_dsts = make_host_vector<T*>(num_columns, stream);
+
+  std::transform(input.begin(), input.end(), h_srcs.begin(), [](auto& col) {
+    return const_cast<T*>(col.template data<T>());
+  });
+
+  for (int i = 0; i < num_columns; ++i) {
+    h_dsts[i] = reinterpret_cast<T*>(base_ptr + i * item_size * num_rows);
+  }
+
+  auto const mr = cudf::get_current_device_resource_ref();
+
+  auto d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr);
+  auto d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr);
+
+  thrust::constant_iterator<size_t> sizes(static_cast<size_t>(item_size * num_rows));
+
+  cudf::detail::batched_memcpy_async(
+    d_srcs.begin(), d_dsts.begin(), sizes, num_columns, stream.value());
+}
+
+struct table_to_array_dispatcher {
+  table_view const& input;
+  device_span<cuda::std::byte> output;
+  rmm::cuda_stream_view stream;
+
+  template <typename T, CUDF_ENABLE_IF(is_fixed_width<T>())>
+  void operator()() const
+  {
+    table_to_array_impl<T>(input, output, stream);
+  }
+
+  template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>())>
+  void operator()() const
+  {
+    CUDF_FAIL("Unsupported dtype");
+  }
+};
+
+}  // namespace
+
+void table_to_array(table_view const& input,
+                    device_span<cuda::std::byte> output,
+                    rmm::cuda_stream_view stream)
+{
+  if (input.num_columns() == 0) return;
+
+  auto const dtype = input.column(0).type();
+
+  cudf::type_dispatcher<cudf::dispatch_storage_type>(
+    dtype, table_to_array_dispatcher{input, output, stream});
+}
+
+}  // namespace detail
+
+void table_to_array(table_view const& input,
+                    device_span<cuda::std::byte> output,
+                    rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  cudf::detail::table_to_array(input, output, stream);
+}
+
+}  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 22b231ffb7f..5c8b5fe233b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -527,7 +527,7 @@ ConfigureTest(
 # * reshape test ----------------------------------------------------------------------------------
 ConfigureTest(
   RESHAPE_TEST reshape/byte_cast_tests.cpp reshape/interleave_columns_tests.cpp
-  reshape/tile_tests.cpp
+  reshape/table_to_array_tests.cpp reshape/tile_tests.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/tests/reshape/table_to_array_tests.cpp b/cpp/tests/reshape/table_to_array_tests.cpp
new file mode 100644
index 00000000000..46af26c6828
--- /dev/null
+++ b/cpp/tests/reshape/table_to_array_tests.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_list_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/reshape.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/span.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+
+#include <cuda/functional>
+
+template <typename T>
+struct TableToDeviceArrayTypedTest : public cudf::test::BaseFixture {};
+
+using SupportedTypes = cudf::test::Types<int8_t,
+                                         int16_t,
+                                         int32_t,
+                                         int64_t,
+                                         uint8_t,
+                                         uint16_t,
+                                         uint32_t,
+                                         uint64_t,
+                                         float,
+                                         double,
+                                         cudf::timestamp_D,
+                                         cudf::timestamp_s,
+                                         cudf::timestamp_ms,
+                                         cudf::timestamp_us,
+                                         cudf::timestamp_ns,
+                                         cudf::duration_D,
+                                         cudf::duration_s,
+                                         cudf::duration_ms,
+                                         cudf::duration_us,
+                                         cudf::duration_ns>;
+
+TYPED_TEST_SUITE(TableToDeviceArrayTypedTest, SupportedTypes);
+
+TYPED_TEST(TableToDeviceArrayTypedTest, SupportedTypes)
+{
+  using T     = TypeParam;
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
+
+  int nrows = 3;
+  int ncols = 4;
+
+  std::vector<std::unique_ptr<cudf::column>> cols;
+  std::vector<T> expected;
+
+  for (int col = 0; col < ncols; ++col) {
+    std::vector<T> data(nrows);
+    for (int row = 0; row < nrows; ++row) {
+      auto val = col * nrows + row + 1;
+      if constexpr (cudf::is_chrono<T>()) {
+        data[row] = T(typename T::duration{val});
+      } else {
+        data[row] = static_cast<T>(val);
+      }
+      expected.push_back(data[row]);
+    }
+    cols.push_back(std::make_unique<cudf::column>(
+      cudf::test::fixed_width_column_wrapper<T>(data.begin(), data.end())));
+  }
+
+  std::vector<cudf::column_view> views(cols.size());
+  std::transform(
+    cols.begin(), cols.end(), views.begin(), [](auto const& col) { return col->view(); });
+  cudf::table_view input{views};
+
+  auto output = cudf::detail::make_zeroed_device_uvector<T>(nrows * ncols, stream, *mr);
+
+  cudf::table_to_array(
+    input,
+    cudf::device_span<cuda::std::byte>(reinterpret_cast<cuda::std::byte*>(output.data()),
+                                       output.size() * sizeof(T)),
+    stream);
+
+  auto host_result = cudf::detail::make_std_vector(output, stream);
+  EXPECT_EQ(host_result, expected);
+}
+
+template <typename T>
+struct FixedPointTableToDeviceArrayTest : public cudf::test::BaseFixture {};
+
+TYPED_TEST_SUITE(FixedPointTableToDeviceArrayTest, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointTableToDeviceArrayTest, SupportedFixedPointTypes)
+{
+  using decimalXX  = TypeParam;
+  using RepType    = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
+
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
+  auto scale  = numeric::scale_type{-2};
+
+  fp_wrapper col0({123, 456, 789}, scale);
+  fp_wrapper col1({321, 654, 987}, scale);
+
+  cudf::table_view input({col0, col1});
+  size_t num_elements = input.num_rows() * input.num_columns();
+
+  auto output = cudf::detail::make_zeroed_device_uvector<RepType>(num_elements, stream, *mr);
+
+  cudf::table_to_array(
+    input,
+    cudf::device_span<cuda::std::byte>(reinterpret_cast<cuda::std::byte*>(output.data()),
+                                       output.size() * sizeof(RepType)),
+    stream);
+
+  auto host_result = cudf::detail::make_std_vector(output, stream);
+
+  std::vector<RepType> expected{123, 456, 789, 321, 654, 987};
+  EXPECT_EQ(host_result, expected);
+}
+
+struct TableToDeviceArrayTest : public cudf::test::BaseFixture {};
+
+TEST(TableToDeviceArrayTest, UnsupportedStringType)
+{
+  auto stream = cudf::get_default_stream();
+  auto col    = cudf::test::strings_column_wrapper({"a", "b", "c"});
+  cudf::table_view input_table({col});
+  rmm::device_buffer output(3 * sizeof(int32_t), stream);
+
+  EXPECT_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), output.size()),
+                         stream),
+    cudf::logic_error);
+}
+
+TEST(TableToDeviceArrayTest, FailsWithNullValues)
+{
+  auto stream = cudf::get_default_stream();
+
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3}, {true, false, true});
+  cudf::table_view input_table({col});
+  rmm::device_buffer output(3 * sizeof(int32_t), stream);
+
+  EXPECT_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), output.size()),
+                         stream),
+    std::invalid_argument);
+}
+
+TEST(TableToDeviceArrayTest, FailsWhenOutputSpanTooSmall)
+{
+  auto stream = cudf::get_default_stream();
+
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3});
+  cudf::table_view input_table({col});
+
+  rmm::device_buffer output(4, stream);
+
+  EXPECT_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), output.size()),
+                         stream),
+    std::invalid_argument);
+}
+
+TEST(TableToDeviceArrayTest, NoRows)
+{
+  auto stream = cudf::get_default_stream();
+
+  cudf::test::fixed_width_column_wrapper<int32_t> col({});
+  cudf::table_view input_table({col});
+
+  rmm::device_buffer output(0, stream);
+
+  EXPECT_NO_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), output.size()),
+                         stream));
+}
+
+TEST(TableToDeviceArrayTest, NoColumns)
+{
+  auto stream = cudf::get_default_stream();
+
+  cudf::table_view input_table{std::vector<cudf::column_view>{}};
+
+  rmm::device_buffer output(0, stream);
+
+  EXPECT_NO_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), output.size()),
+                         stream));
+}
+
+TEST(TableToDeviceArrayTest, FlatSizeExceedsSizeTypeLimit)
+{
+  auto stream      = cudf::get_default_stream();
+  auto size_limit  = static_cast<size_t>(std::numeric_limits<cudf::size_type>::max());
+  auto num_rows    = size_limit * 0.6;
+  auto num_cols    = 2;
+  auto flat_size   = num_rows * num_cols;
+  auto total_bytes = flat_size * sizeof(int8_t);
+
+  std::vector<int8_t> data(num_rows, 1);
+  auto col = cudf::test::fixed_width_column_wrapper<int8_t>(data.begin(), data.end());
+
+  cudf::table_view input_table({col, col});
+
+  rmm::device_buffer output(total_bytes, stream);
+
+  EXPECT_NO_THROW(
+    cudf::table_to_array(input_table,
+                         cudf::device_span<cuda::std::byte>(
+                           reinterpret_cast<cuda::std::byte*>(output.data()), total_bytes),
+                         stream));
+}