From 268996ad101dc69414992aa0227eba4f93012c91 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 1 Apr 2024 18:59:48 -0400 Subject: [PATCH] Add `to_arrow_device` function to cudf interop using nanoarrow (#15047) Introduce new `to_arrow_device` and `to_arrow_schema` functions to utilize the `ArrowDeviceArray` structure for zero-copy passing of libcudf::table. Add nanoarrow as a vendored lib and a script to update it. Initial step towards addressing #14926 Authors: - Matt Topol (https://github.com/zeroshade) - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/15047 --- cpp/CMakeLists.txt | 8 +- cpp/cmake/thirdparty/get_nanoarrow.cmake | 36 + cpp/include/cudf/interop.hpp | 96 ++- cpp/include/cudf/interop/detail/arrow.hpp | 48 ++ cpp/src/interop/to_arrow_device.cu | 727 ++++++++++++++++++++ cpp/tests/CMakeLists.txt | 7 +- cpp/tests/interop/nanoarrow_utils.hpp | 226 +++++++ cpp/tests/interop/to_arrow_device_test.cpp | 739 +++++++++++++++++++++ docs/cudf/source/conf.py | 1 + 9 files changed, 1882 insertions(+), 6 deletions(-) create mode 100644 cpp/cmake/thirdparty/get_nanoarrow.cmake create mode 100644 cpp/include/cudf/interop/detail/arrow.hpp create mode 100644 cpp/src/interop/to_arrow_device.cu create mode 100644 cpp/tests/interop/nanoarrow_utils.hpp create mode 100644 cpp/tests/interop/to_arrow_device_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 618d03f7078..f1d43e3c35f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -210,12 +210,14 @@ include(cmake/thirdparty/get_kvikio.cmake) include(cmake/thirdparty/get_fmt.cmake) # find spdlog include(cmake/thirdparty/get_spdlog.cmake) +# find nanoarrow +include(cmake/thirdparty/get_nanoarrow.cmake) # Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved if(NOT BUILD_SHARED_LIBS) include("${rapids-cmake-dir}/export/find_package_file.cmake") list(APPEND METADATA_KINDS BUILD INSTALL) - list(APPEND dependencies KvikIO ZLIB nvcomp) + list(APPEND dependencies KvikIO ZLIB nvcomp nanoarrow) if(TARGET cufile::cuFile_interface) list(APPEND dependencies cuFile) endif() @@ -358,6 +360,7 @@ add_library( src/interop/dlpack.cpp src/interop/from_arrow.cu src/interop/to_arrow.cu + src/interop/to_arrow_device.cu src/interop/detail/arrow_allocator.cpp src/io/avro/avro.cpp src/io/avro/avro_gpu.cu @@ -735,6 +738,7 @@ target_include_directories( "$" "$" PRIVATE "$" + "$" INTERFACE "$" ) @@ -783,7 +787,7 @@ target_link_libraries( cudf PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio - $ + $ nanoarrow ) # Add Conda library, and include paths if specified diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake new file mode 100644 index 00000000000..be938a89ccd --- /dev/null +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -0,0 +1,36 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds nanoarrow and sets any additional necessary environment variables. +function(find_and_configure_nanoarrow) + set(oneValueArgs VERSION FORK PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + rapids_cpm_find( + nanoarrow ${PKG_VERSION} + GLOBAL_TARGETS nanoarrow + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/arrow-nanoarrow.git + GIT_TAG ${PKG_PINNED_TAG} + # TODO: Commit hashes are not supported with shallow clones. Can switch this if and when we pin + # to an actual tag. + GIT_SHALLOW FALSE + OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" + ) + set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) +endfunction() + +find_and_configure_nanoarrow( + VERSION 0.4.0 FORK apache PINNED_TAG c97720003ff863b81805bcdb9f7c91306ab6b6a8 +) diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 2ee6f19614d..871f48e3aac 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,11 +34,16 @@ #include #include #include +#include #include struct DLManagedTensor; +struct ArrowDeviceArray; + +struct ArrowSchema; + namespace cudf { /** * @addtogroup interop_dlpack @@ -162,6 +167,95 @@ std::shared_ptr to_arrow(cudf::scalar const& input, column_metadata const& metadata = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); + +/** + * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter + * + */ +using unique_schema_t = std::unique_ptr; + +/** + * @brief typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter + * + */ +using unique_device_array_t = std::unique_ptr; + +/** + * @brief Create ArrowSchema from cudf table and metadata + * + * Populates and returns an ArrowSchema C struct using a table and metadata. + * + * @note For decimals, since the precision is not stored for them in libcudf, + * decimals will be converted to an Arrow decimal128 which has the widest precision that cudf + * decimal type supports. For example, `numeric::decimal32` will be converted to Arrow decimal128 + * with the precision of 9 which is the maximum precision for 32-bit types. Similarly, + * `numeric::decimal128` will be converted to Arrow decimal128 with the precision of 38. + * + * @param input Table to create a schema from + * @param metadata Contains the hierarchy of names of columns and children + * @return ArrowSchema generated from `input` + */ +unique_schema_t to_arrow_schema(cudf::table_view const& input, + cudf::host_span metadata); + +/** + * @brief Create `ArrowDeviceArray` from cudf table and metadata + * + * Populates the C struct ArrowDeviceArray without performing copies if possible. + * This maintains the data on the GPU device and gives ownership of the table + * and its buffers to the ArrowDeviceArray struct. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up the memory. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * @note Copies will be performed in the cases where cudf differs from Arrow + * such as in the representation of bools (Arrow uses a bitmap, cudf uses 1-byte per value). + * + * @param table Input table, ownership of the data will be moved to the result + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of the GPU data, consumer must call release + */ +unique_device_array_t to_arrow_device( + cudf::table&& table, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create `ArrowDeviceArray` from cudf column and metadata + * + * Populates the C struct ArrowDeviceArray without performing copies if possible. + * This maintains the data on the GPU device and gives ownership of the table + * and its buffers to the ArrowDeviceArray struct. + * + * After calling this function, the release callback on the returned ArrowDeviceArray + * must be called to clean up the memory. + * + * @note For decimals, since the precision is not stored for them in libcudf + * it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type + * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision + * 9 which is the maximum precision for 32-bit types. Similar, numeric::decimal128 will be + * converted to Arrow decimal128 of the precision 38. + * + * @note Copies will be performed in the cases where cudf differs from Arrow such as + * in the representation of bools (Arrow uses a bitmap, cudf uses 1 byte per value). + * + * @param col Input column, ownership of the data will be moved to the result + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for any allocations during conversion + * @return ArrowDeviceArray which will have ownership of the GPU data + */ +unique_device_array_t to_arrow_device( + cudf::column&& col, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Create `cudf::table` from given arrow Table input * diff --git a/cpp/include/cudf/interop/detail/arrow.hpp b/cpp/include/cudf/interop/detail/arrow.hpp new file mode 100644 index 00000000000..8043ecf5422 --- /dev/null +++ b/cpp/include/cudf/interop/detail/arrow.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// from Arrow C Device Data Interface +// https://arrow.apache.org/docs/format/CDeviceDataInterface.html +#ifndef ARROW_C_DEVICE_DATA_INTERFACE +#define ARROW_C_DEVICE_DATA_INTERFACE + +// Device type for the allocated memory +typedef int32_t ArrowDeviceType; + +// CPU device, same as using ArrowArray directly +#define ARROW_DEVICE_CPU 1 +// CUDA GPU Device +#define ARROW_DEVICE_CUDA 2 +// Pinned CUDA CPU memory by cudaMallocHost +#define ARROW_DEVICE_CUDA_HOST 3 +// CUDA managed/unified memory allocated by cudaMallocManaged +#define ARROW_DEVICE_CUDA_MANAGED 13 + +struct ArrowDeviceArray { + struct ArrowArray array; + int64_t device_id; + ArrowDeviceType device_type; + void* sync_event; + + // reserved bytes for future expansion + int64_t reserved[3]; +}; + +#endif // ARROW_C_DEVICE_DATA_INTERFACE diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu new file mode 100644 index 00000000000..e824412e71c --- /dev/null +++ b/cpp/src/interop/to_arrow_device.cu @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf { +namespace detail { +namespace { +static constexpr int validity_buffer_idx = 0; +static constexpr int fixed_width_data_buffer_idx = 1; + +ArrowType id_to_arrow_type(cudf::type_id id) +{ + switch (id) { + case cudf::type_id::BOOL8: return NANOARROW_TYPE_BOOL; + case cudf::type_id::INT8: return NANOARROW_TYPE_INT8; + case cudf::type_id::INT16: return NANOARROW_TYPE_INT16; + case cudf::type_id::INT32: return NANOARROW_TYPE_INT32; + case cudf::type_id::INT64: return NANOARROW_TYPE_INT64; + case cudf::type_id::UINT8: return NANOARROW_TYPE_UINT8; + case cudf::type_id::UINT16: return NANOARROW_TYPE_UINT16; + case cudf::type_id::UINT32: return NANOARROW_TYPE_UINT32; + case cudf::type_id::UINT64: return NANOARROW_TYPE_UINT64; + case cudf::type_id::FLOAT32: return NANOARROW_TYPE_FLOAT; + case cudf::type_id::FLOAT64: return NANOARROW_TYPE_DOUBLE; + case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_DATE32; + default: CUDF_FAIL("Unsupported type_id conversion to arrow type"); + } +} + +struct dispatch_to_arrow_type { + template ())> + int operator()(column_view, column_metadata const&, ArrowSchema*) + { + CUDF_FAIL("Unsupported type for to_arrow_schema"); + } + + template ())> + int operator()(column_view input_view, column_metadata const&, ArrowSchema* out) + { + cudf::type_id id = input_view.type().id(); + switch (id) { + case cudf::type_id::TIMESTAMP_SECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_SECOND, nullptr); + case cudf::type_id::TIMESTAMP_MILLISECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MILLI, nullptr); + case cudf::type_id::TIMESTAMP_MICROSECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, nullptr); + case cudf::type_id::TIMESTAMP_NANOSECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_NANO, nullptr); + case cudf::type_id::DURATION_SECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_DURATION, NANOARROW_TIME_UNIT_SECOND, nullptr); + case cudf::type_id::DURATION_MILLISECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_DURATION, NANOARROW_TIME_UNIT_MILLI, nullptr); + case cudf::type_id::DURATION_MICROSECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_DURATION, NANOARROW_TIME_UNIT_MICRO, nullptr); + case cudf::type_id::DURATION_NANOSECONDS: + return ArrowSchemaSetTypeDateTime( + out, NANOARROW_TYPE_DURATION, NANOARROW_TIME_UNIT_NANO, nullptr); + default: return ArrowSchemaSetType(out, id_to_arrow_type(id)); + } + } +}; + +template +int decimals_to_arrow(column_view input, ArrowSchema* out) +{ + // Arrow doesn't support decimal32/decimal64 currently. decimal128 + // is the smallest that arrow supports besides float32/float64 so we + // upcast to decimal128. + return ArrowSchemaSetTypeDecimal(out, + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -input.type().scale()); +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const&, + ArrowSchema* out) +{ + using DeviceType = int32_t; + return decimals_to_arrow(input, out); +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const&, + ArrowSchema* out) +{ + using DeviceType = int64_t; + return decimals_to_arrow(input, out); +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const&, + ArrowSchema* out) +{ + using DeviceType = __int128_t; + return decimals_to_arrow(input, out); +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const&, + ArrowSchema* out) +{ + return ArrowSchemaSetType(out, NANOARROW_TYPE_STRING); +} + +// these forward declarations are needed due to the recursive calls to them +// inside their definitions and in struct_vew for handling children +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const& metadata, + ArrowSchema* out); + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const& metadata, + ArrowSchema* out); + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const& metadata, + ArrowSchema* out) +{ + CUDF_EXPECTS(metadata.children_meta.size() == static_cast(input.num_children()), + "Number of field names and number of children doesn't match\n"); + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(out, input.num_children())); + for (int i = 0; i < input.num_children(); ++i) { + auto child = out->children[i]; + auto col = input.child(i); + ArrowSchemaInit(child); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(child, metadata.children_meta[i].name.c_str())); + + child->flags = col.has_nulls() ? ARROW_FLAG_NULLABLE : 0; + + if (col.type().id() == cudf::type_id::EMPTY) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(child, NANOARROW_TYPE_NA)); + continue; + } + + NANOARROW_RETURN_NOT_OK(cudf::type_dispatcher( + col.type(), detail::dispatch_to_arrow_type{}, col, metadata.children_meta[i], child)); + } + + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const& metadata, + ArrowSchema* out) +{ + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(out, NANOARROW_TYPE_LIST)); + auto child = input.child(cudf::lists_column_view::child_column_index); + ArrowSchemaInit(out->children[0]); + if (child.type().id() == cudf::type_id::EMPTY) { + return ArrowSchemaSetType(out->children[0], NANOARROW_TYPE_NA); + } + auto child_meta = + metadata.children_meta.empty() ? column_metadata{"element"} : metadata.children_meta[0]; + + out->flags = input.has_nulls() ? ARROW_FLAG_NULLABLE : 0; + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(out->children[0], child_meta.name.c_str())); + out->children[0]->flags = child.has_nulls() ? ARROW_FLAG_NULLABLE : 0; + return cudf::type_dispatcher( + child.type(), detail::dispatch_to_arrow_type{}, child, child_meta, out->children[0]); +} + +template <> +int dispatch_to_arrow_type::operator()(column_view input, + column_metadata const& metadata, + ArrowSchema* out) +{ + cudf::dictionary_column_view dview{input}; + + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(out, id_to_arrow_type(dview.indices().type().id()))); + NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateDictionary(out)); + ArrowSchemaInit(out->dictionary); + + auto dict_keys = dview.keys(); + return cudf::type_dispatcher( + dict_keys.type(), + detail::dispatch_to_arrow_type{}, + dict_keys, + metadata.children_meta.empty() ? column_metadata{"keys"} : metadata.children_meta[0], + out->dictionary); +} + +template +void device_buffer_finalize(ArrowBufferAllocator* allocator, uint8_t*, int64_t) +{ + auto* unique_buffer = reinterpret_cast*>(allocator->private_data); + delete unique_buffer; +} + +template +struct is_device_scalar : public std::false_type {}; + +template +struct is_device_scalar> : public std::true_type {}; + +template +struct is_device_uvector : public std::false_type {}; + +template +struct is_device_uvector> : public std::true_type {}; + +template +int set_buffer(std::unique_ptr device_buf, int64_t i, ArrowArray* out) +{ + ArrowBuffer* buf = ArrowArrayBuffer(out, i); + auto ptr = reinterpret_cast(device_buf->data()); + buf->size_bytes = [&] { + if constexpr (is_device_scalar::value) { + return sizeof(typename T::value_type); + } else if constexpr (is_device_uvector::value) { + return sizeof(typename T::value_type) * device_buf->size(); + } else { + return device_buf->size(); + } + }(); + // we make a new unique_ptr and move to it in case there was a custom deleter + NANOARROW_RETURN_NOT_OK( + ArrowBufferSetAllocator(buf, + ArrowBufferDeallocator(&device_buffer_finalize, + new std::unique_ptr(std::move(device_buf))))); + buf->data = ptr; + return NANOARROW_OK; +} + +int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column const& column) +{ + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromType(arr, storage_type)); + arr->length = column.size(); + arr->null_count = column.null_count(); + return NANOARROW_OK; +} + +struct dispatch_to_arrow_device { + template ())> + int operator()(cudf::column&&, + rmm::cuda_stream_view, + rmm::mr::device_memory_resource*, + ArrowArray*) + { + CUDF_FAIL("Unsupported type for to_arrow_device"); + } + + template ())> + int operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) + { + nanoarrow::UniqueArray tmp; + + const ArrowType storage_type = [&] { + switch (column.type().id()) { + case cudf::type_id::TIMESTAMP_SECONDS: + case cudf::type_id::TIMESTAMP_MILLISECONDS: + case cudf::type_id::TIMESTAMP_MICROSECONDS: + case cudf::type_id::TIMESTAMP_NANOSECONDS: return NANOARROW_TYPE_INT64; + case cudf::type_id::DURATION_SECONDS: + case cudf::type_id::DURATION_MILLISECONDS: + case cudf::type_id::DURATION_MICROSECONDS: + case cudf::type_id::DURATION_NANOSECONDS: return NANOARROW_TYPE_INT64; + default: return id_to_arrow_type(column.type().id()); + } + }(); + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), storage_type, column)); + + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.data), fixed_width_data_buffer_idx, tmp.get())); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; + } +}; + +template +int decimals_to_arrow(cudf::column&& input, + int32_t precision, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, input)); + + if constexpr (!std::is_same_v) { + constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType); + auto buf = + std::make_unique>(input.size() * BIT_WIDTH_RATIO, stream, mr); + + auto count = thrust::make_counting_iterator(0); + + thrust::for_each(rmm::exec_policy(stream, mr), + count, + count + input.size(), + [in = input.view().begin(), + out = buf->data(), + BIT_WIDTH_RATIO] __device__(auto in_idx) { + auto const out_idx = in_idx * BIT_WIDTH_RATIO; + // the lowest order bits are the value, the remainder + // simply matches the sign bit to satisfy the two's + // complement integer representation of negative numbers. + out[out_idx] = in[in_idx]; +#pragma unroll BIT_WIDTH_RATIO - 1 + for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { + out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; + } + }); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(buf), fixed_width_data_buffer_idx, tmp.get())); + } + + auto contents = input.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + if constexpr (std::is_same_v) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.data), fixed_width_data_buffer_idx, tmp.get())); + } + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + using DeviceType = int32_t; + return decimals_to_arrow( + std::move(column), cudf::detail::max_precision(), stream, mr, out); +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + using DeviceType = int64_t; + return decimals_to_arrow( + std::move(column), cudf::detail::max_precision(), stream, mr, out); +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + using DeviceType = __int128_t; + return decimals_to_arrow( + std::move(column), cudf::detail::max_precision(), stream, mr, out); +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column)); + + auto bitmask = bools_to_mask(column.view(), stream, mr); + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(bitmask.first), fixed_width_data_buffer_idx, tmp.get())); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_STRING, column)); + + if (column.size() == 0) { + // the scalar zero here is necessary because the spec for string arrays states + // that the offsets buffer should contain "length + 1" signed integers. So in + // the case of a 0 length string array, there should be exactly 1 value, zero, + // in the offsets buffer. While some arrow implementations may accept a zero-sized + // offsets buffer, best practices would be to allocate the buffer with the single value. + auto zero = std::make_unique>(0, stream, mr); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(zero), fixed_width_data_buffer_idx, tmp.get())); + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; + } + + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + auto offsets_contents = + contents.children[cudf::strings_column_view::offsets_column_index]->release(); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(offsets_contents.data), 1, tmp.get())); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(contents.data), 2, tmp.get())); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out); + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out); + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_STRUCT, column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), column.num_children())); + + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + for (size_t i = 0; i < size_t(tmp->n_children); ++i) { + ArrowArray* child_ptr = tmp->children[i]; + auto& child = contents.children[i]; + if (child->type().id() == cudf::type_id::EMPTY) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromType(child_ptr, NANOARROW_TYPE_NA)); + child_ptr->length = child->size(); + child_ptr->null_count = child->size(); + } else { + NANOARROW_RETURN_NOT_OK(cudf::type_dispatcher( + child->type(), dispatch_to_arrow_device{}, std::move(*child), stream, mr, child_ptr)); + } + } + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_LIST, column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), 1)); + + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + auto offsets_contents = + contents.children[cudf::lists_column_view::offsets_column_index]->release(); + NANOARROW_RETURN_NOT_OK(set_buffer(std::move(offsets_contents.data), 1, tmp.get())); + + auto& child = contents.children[cudf::lists_column_view::child_column_index]; + if (child->type().id() == cudf::type_id::EMPTY) { + NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromType(tmp->children[0], NANOARROW_TYPE_NA)); + tmp->children[0]->length = 0; + tmp->children[0]->null_count = 0; + } else { + NANOARROW_RETURN_NOT_OK(cudf::type_dispatcher( + child->type(), dispatch_to_arrow_device{}, std::move(*child), stream, mr, tmp->children[0])); + } + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +template <> +int dispatch_to_arrow_device::operator()(cudf::column&& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr, + ArrowArray* out) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_RETURN_NOT_OK(initialize_array( + tmp.get(), + id_to_arrow_type(column.child(cudf::dictionary_column_view::indices_column_index).type().id()), + column)); + NANOARROW_RETURN_NOT_OK(ArrowArrayAllocateDictionary(tmp.get())); + + auto contents = column.release(); + if (contents.null_mask) { + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(contents.null_mask), validity_buffer_idx, tmp.get())); + } + + auto indices_contents = + contents.children[cudf::dictionary_column_view::indices_column_index]->release(); + NANOARROW_RETURN_NOT_OK( + set_buffer(std::move(indices_contents.data), fixed_width_data_buffer_idx, tmp.get())); + + auto& keys = contents.children[cudf::dictionary_column_view::keys_column_index]; + NANOARROW_RETURN_NOT_OK(cudf::type_dispatcher( + keys->type(), dispatch_to_arrow_device{}, std::move(*keys), stream, mr, tmp->dictionary)); + + ArrowArrayMove(tmp.get(), out); + return NANOARROW_OK; +} + +struct ArrowDeviceArrayPrivateData { + ArrowArray parent; + cudaEvent_t sync_event; +}; + +void ArrowDeviceArrayRelease(ArrowArray* array) +{ + auto private_data = reinterpret_cast(array->private_data); + cudaEventDestroy(private_data->sync_event); + ArrowArrayRelease(&private_data->parent); + delete private_data; + array->release = nullptr; +} + +} // namespace +} // namespace detail + +unique_schema_t to_arrow_schema(cudf::table_view const& input, + cudf::host_span metadata) +{ + CUDF_EXPECTS((metadata.size() == static_cast(input.num_columns())), + "columns' metadata should be equal to the number of columns in table"); + + nanoarrow::UniqueSchema result; + ArrowSchemaInit(result.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(result.get(), input.num_columns())); + + for (int i = 0; i < input.num_columns(); ++i) { + auto child = result->children[i]; + auto col = input.column(i); + ArrowSchemaInit(child); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(child, metadata[i].name.c_str())); + child->flags = col.has_nulls() ? ARROW_FLAG_NULLABLE : 0; + + if (col.type().id() == cudf::type_id::EMPTY) { + NANOARROW_THROW_NOT_OK(ArrowSchemaSetType(child, NANOARROW_TYPE_NA)); + continue; + } + + NANOARROW_THROW_NOT_OK( + cudf::type_dispatcher(col.type(), detail::dispatch_to_arrow_type{}, col, metadata[i], child)); + } + + unique_schema_t out(new ArrowSchema, [](ArrowSchema* schema) { + if (schema->release != nullptr) { ArrowSchemaRelease(schema); } + delete schema; + }); + result.move(out.get()); + return out; +} + +unique_device_array_t to_arrow_device(cudf::table&& table, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + nanoarrow::UniqueArray tmp; + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromType(tmp.get(), NANOARROW_TYPE_STRUCT)); + + NANOARROW_THROW_NOT_OK(ArrowArrayAllocateChildren(tmp.get(), table.num_columns())); + tmp->length = table.num_rows(); + tmp->null_count = 0; + + auto cols = table.release(); + for (size_t i = 0; i < cols.size(); ++i) { + auto child = tmp->children[i]; + auto col = cols[i].get(); + + if (col->type().id() == cudf::type_id::EMPTY) { + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromType(child, NANOARROW_TYPE_NA)); + child->length = col->size(); + child->null_count = col->size(); + continue; + } + + NANOARROW_THROW_NOT_OK(cudf::type_dispatcher( + col->type(), detail::dispatch_to_arrow_device{}, std::move(*col), stream, mr, child)); + } + + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(tmp.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + auto private_data = std::make_unique(); + cudaEventCreate(&private_data->sync_event); + + auto status = cudaEventRecord(private_data->sync_event, stream); + if (status != cudaSuccess) { CUDF_FAIL("could not create event to sync on"); } + + ArrowArrayMove(tmp.get(), &private_data->parent); + unique_device_array_t result(new ArrowDeviceArray, [](ArrowDeviceArray* arr) { + if (arr->array.release != nullptr) { ArrowArrayRelease(&arr->array); } + delete arr; + }); + result->device_id = rmm::get_current_cuda_device().value(); + result->device_type = ARROW_DEVICE_CUDA; + result->sync_event = &private_data->sync_event; + result->array = private_data->parent; + result->array.private_data = private_data.release(); + result->array.release = &detail::ArrowDeviceArrayRelease; + return result; +} + +unique_device_array_t to_arrow_device(cudf::column&& col, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + nanoarrow::UniqueArray tmp; + if (col.type().id() == cudf::type_id::EMPTY) { + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromType(tmp.get(), NANOARROW_TYPE_NA)); + tmp->length = col.size(); + tmp->null_count = col.size(); + } + + NANOARROW_THROW_NOT_OK(cudf::type_dispatcher( + col.type(), detail::dispatch_to_arrow_device{}, std::move(col), stream, mr, tmp.get())); + + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(tmp.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + auto private_data = std::make_unique(); + cudaEventCreate(&private_data->sync_event); + + auto status = cudaEventRecord(private_data->sync_event, stream); + if (status != cudaSuccess) { CUDF_FAIL("could not create event to sync on"); } + + ArrowArrayMove(tmp.get(), &private_data->parent); + unique_device_array_t result(new ArrowDeviceArray, [](ArrowDeviceArray* arr) { + if (arr->array.release != nullptr) { ArrowArrayRelease(&arr->array); } + delete arr; + }); + result->device_id = rmm::get_current_cuda_device().value(); + result->device_type = ARROW_DEVICE_CUDA; + result->sync_event = &private_data->sync_event; + result->array = private_data->parent; + result->array.private_data = private_data.release(); + result->array.release = &detail::ArrowDeviceArrayRelease; + return result; +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9dbf278c71d..053fcc0989a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -24,7 +24,7 @@ rapids_test_init() # properties and linking to build the test function(ConfigureTest CMAKE_TEST_NAME) set(options) - set(one_value GPUS PERCENT STREAM_MODE) + set(one_value GPUS PERCENT STREAM_MODE EXTRA_LIB) set(multi_value) cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT) @@ -56,7 +56,7 @@ function(ConfigureTest CMAKE_TEST_NAME) target_link_libraries( ${CMAKE_TEST_NAME} PRIVATE cudftestutil GTest::gmock_main GTest::gtest_main nvtx3-cpp - $ + $ "${_CUDF_TEST_EXTRA_LIB}" ) rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) rapids_test_add( @@ -267,7 +267,8 @@ ConfigureTest( # ################################################################################################## # * interop tests ------------------------------------------------------------------------- ConfigureTest( - INTEROP_TEST interop/to_arrow_test.cpp interop/from_arrow_test.cpp interop/dlpack_test.cpp + INTEROP_TEST interop/to_arrow_device_test.cpp interop/to_arrow_test.cpp + interop/from_arrow_test.cpp interop/dlpack_test.cpp EXTRA_LIB nanoarrow ) # ################################################################################################## diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp new file mode 100644 index 00000000000..e7ffa9e40f4 --- /dev/null +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +// no-op allocator/deallocator to set into ArrowArray buffers that we don't +// want to own their buffers. +static ArrowBufferAllocator noop_alloc = (struct ArrowBufferAllocator){ + .reallocate = [](ArrowBufferAllocator*, uint8_t* ptr, int64_t, int64_t) -> uint8_t* { + return ptr; + }, + .free = [](ArrowBufferAllocator*, uint8_t*, int64_t) {}, + .private_data = nullptr, +}; + +// populate the ArrowArray by copying host data buffers for fixed width types other +// than boolean. +template +std::enable_if_t() and !std::is_same_v, void> get_nanoarrow_array( + ArrowArray* arr, std::vector const& data, std::vector const& mask = {}) +{ + arr->length = data.size(); + NANOARROW_THROW_NOT_OK( + ArrowBufferAppend(ArrowArrayBuffer(arr, 1), data.data(), sizeof(T) * data.size())); + if (!mask.empty()) { + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(arr), mask.size())); + ArrowBitmapAppendInt8Unsafe( + ArrowArrayValidityBitmap(arr), reinterpret_cast(mask.data()), mask.size()); + arr->null_count = ArrowBitCountSet(ArrowArrayValidityBitmap(arr)->buffer.data, 0, data.size()); + } else { + arr->null_count = 0; + } + + CUDF_EXPECTS(ArrowArrayFinishBuildingDefault(arr, nullptr) == NANOARROW_OK, + "failed to construct array"); +} + +// populate an ArrowArray with pointers to the raw device buffers of a cudf::column_view +// and use the no-op alloc so that the ArrowArray doesn't presume ownership of the data +template +std::enable_if_t() and !std::is_same_v, void> populate_from_col( + ArrowArray* arr, cudf::column_view view) +{ + arr->length = view.size(); + arr->null_count = view.null_count(); + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 0), noop_alloc); + ArrowArrayValidityBitmap(arr)->buffer.data = + const_cast(reinterpret_cast(view.null_mask())); + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 1), noop_alloc); + ArrowArrayBuffer(arr, 1)->data = const_cast(view.data()); +} + +// populate an ArrowArray with boolean data by generating the appropriate +// bitmaps to copy the data. +template +std::enable_if_t, void> get_nanoarrow_array( + ArrowArray* arr, std::vector const& data, std::vector const& mask = {}) +{ + ArrowBitmap bool_data; + ArrowBitmapInit(&bool_data); + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(&bool_data, data.size())); + std::for_each(data.begin(), data.end(), [&](const auto&& elem) { + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(&bool_data, (elem) ? 1 : 0, 1)); + }); + NANOARROW_THROW_NOT_OK(ArrowArraySetBuffer(arr, 1, &bool_data.buffer)); + + if (!mask.empty()) { + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(arr), mask.size())); + std::for_each(mask.begin(), mask.end(), [&](const auto&& elem) { + NANOARROW_THROW_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(arr), (elem) ? 1 : 0, 1)); + }); + arr->null_count = ArrowBitCountSet(ArrowArrayValidityBitmap(arr)->buffer.data, 0, data.size()); + } else { + arr->null_count = 0; + } + + CUDF_EXPECTS(ArrowArrayFinishBuildingDefault(arr, nullptr) == NANOARROW_OK, + "failed to construct boolean array"); +} + +// populate an ArrowArray from a boolean cudf column. Since Arrow and cudf +// still represent boolean arrays differently, we have to use bools_to_mask +// and give the ArrowArray object ownership of the device data. +template +std::enable_if_t, void> populate_from_col(ArrowArray* arr, + cudf::column_view view) +{ + arr->length = view.size(); + arr->null_count = view.null_count(); + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 0), noop_alloc); + ArrowArrayValidityBitmap(arr)->buffer.data = + const_cast(reinterpret_cast(view.null_mask())); + + auto bitmask = cudf::bools_to_mask(view); + auto ptr = reinterpret_cast(bitmask.first->data()); + ArrowBufferSetAllocator( + ArrowArrayBuffer(arr, 1), + ArrowBufferDeallocator( + [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { + auto buf = reinterpret_cast*>(alloc->private_data); + delete buf; + }, + new std::unique_ptr(std::move(bitmask.first)))); + ArrowArrayBuffer(arr, 1)->data = ptr; +} + +// populate an ArrowArray by copying the string data and constructing the offsets +// buffer. +template +std::enable_if_t, void> get_nanoarrow_array( + ArrowArray* arr, std::vector const& data, std::vector const& mask = {}) +{ + NANOARROW_THROW_NOT_OK(ArrowArrayStartAppending(arr)); + for (auto& str : data) { + NANOARROW_THROW_NOT_OK(ArrowArrayAppendString(arr, ArrowCharView(str.c_str()))); + } + + if (!mask.empty()) { + ArrowBitmapReset(ArrowArrayValidityBitmap(arr)); + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(arr), mask.size())); + ArrowBitmapAppendInt8Unsafe( + ArrowArrayValidityBitmap(arr), reinterpret_cast(mask.data()), mask.size()); + arr->null_count = ArrowBitCountSet(ArrowArrayValidityBitmap(arr)->buffer.data, 0, data.size()); + } else { + arr->null_count = 0; + } + + CUDF_EXPECTS(ArrowArrayFinishBuildingDefault(arr, nullptr) == NANOARROW_OK, + "failed to construct string array"); +} + +// populate an ArrowArray with the string data buffers of a cudf column_view +// using no-op allocator so the ArrowArray knows it doesn't have ownership +// of the device buffers. +template +std::enable_if_t, void> populate_from_col( + ArrowArray* arr, cudf::column_view view) +{ + arr->length = view.size(); + arr->null_count = view.null_count(); + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 0), noop_alloc); + ArrowArrayValidityBitmap(arr)->buffer.data = + const_cast(reinterpret_cast(view.null_mask())); + + cudf::strings_column_view sview{view}; + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 1), noop_alloc); + ArrowArrayBuffer(arr, 1)->data = const_cast(sview.offsets().data()); + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 2), noop_alloc); + ArrowArrayBuffer(arr, 2)->data = const_cast(view.data()); +} + +// populate a dictionary ArrowArray by delegating the copying of the indices +// and key arrays +template +void get_nanoarrow_dict_array(ArrowArray* arr, + std::vector const& keys, + std::vector const& ind, + std::vector const& validity = {}) +{ + get_nanoarrow_array(arr->dictionary, keys); + get_nanoarrow_array(arr, ind, validity); +} + +// populate a list ArrowArray by copying the offsets and data buffers +template +void get_nanoarrow_list_array(ArrowArray* arr, + std::vector data, + std::vector offsets, + std::vector data_validity = {}, + std::vector list_validity = {}) +{ + get_nanoarrow_array(arr->children[0], data, data_validity); + + arr->length = offsets.size() - 1; + NANOARROW_THROW_NOT_OK( + ArrowBufferAppend(ArrowArrayBuffer(arr, 1), offsets.data(), sizeof(int32_t) * offsets.size())); + if (!list_validity.empty()) { + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(arr), list_validity.size())); + ArrowBitmapAppendInt8Unsafe(ArrowArrayValidityBitmap(arr), + reinterpret_cast(list_validity.data()), + arr->length); + arr->null_count = ArrowBitCountSet(ArrowArrayValidityBitmap(arr)->buffer.data, 0, arr->length); + } else { + arr->null_count = 0; + } + + CUDF_EXPECTS(ArrowArrayFinishBuildingDefault(arr, nullptr) == NANOARROW_OK, + "failed to construct list array"); +} + +// populate an ArrowArray list array from device buffers using a no-op +// allocator so that the ArrowArray doesn't have ownership of the buffers +void populate_list_from_col(ArrowArray* arr, cudf::lists_column_view view) +{ + arr->length = view.size(); + arr->null_count = view.null_count(); + + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 0), noop_alloc); + ArrowArrayValidityBitmap(arr)->buffer.data = + const_cast(reinterpret_cast(view.null_mask())); + + ArrowBufferSetAllocator(ArrowArrayBuffer(arr, 1), noop_alloc); + ArrowArrayBuffer(arr, 1)->data = const_cast(view.offsets().data()); +} diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp new file mode 100644 index 00000000000..243aa4e81af --- /dev/null +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -0,0 +1,739 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nanoarrow_utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using vector_of_columns = std::vector>; + +std::tuple, nanoarrow::UniqueSchema, nanoarrow::UniqueArray> +get_nanoarrow_tables(cudf::size_type length) +{ + std::vector int64_data(length); + std::vector bool_data(length); + std::vector string_data(length); + std::vector validity(length); + std::vector bool_validity(length); + std::vector bool_data_validity; + cudf::size_type length_of_individual_list = 3; + cudf::size_type length_of_list = length_of_individual_list * length; + std::vector list_int64_data(length_of_list); + std::vector list_int64_data_validity(length_of_list); + std::vector list_offsets(length + 1); + + std::vector> columns; + + columns.emplace_back(cudf::test::fixed_width_column_wrapper( + int64_data.begin(), int64_data.end(), validity.begin()) + .release()); + columns.emplace_back( + cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin()) + .release()); + auto col4 = cudf::test::fixed_width_column_wrapper( + int64_data.begin(), int64_data.end(), validity.begin()); + auto dict_col = cudf::dictionary::encode(col4); + columns.emplace_back(std::move(cudf::dictionary::encode(col4))); + columns.emplace_back(cudf::test::fixed_width_column_wrapper( + bool_data.begin(), bool_data.end(), bool_validity.begin()) + .release()); + auto list_child_column = cudf::test::fixed_width_column_wrapper( + list_int64_data.begin(), list_int64_data.end(), list_int64_data_validity.begin()); + auto list_offsets_column = + cudf::test::fixed_width_column_wrapper(list_offsets.begin(), list_offsets.end()); + auto [list_mask, list_nulls] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( + bool_data_validity.begin(), bool_data_validity.end())); + columns.emplace_back(cudf::make_lists_column(length, + list_offsets_column.release(), + list_child_column.release(), + list_nulls, + std::move(*list_mask))); + auto int_column = cudf::test::fixed_width_column_wrapper( + int64_data.begin(), int64_data.end(), validity.begin()) + .release(); + auto str_column = + cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin()) + .release(); + vector_of_columns cols; + cols.push_back(move(int_column)); + cols.push_back(move(str_column)); + auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( + bool_data_validity.begin(), bool_data_validity.end())); + columns.emplace_back( + cudf::make_structs_column(length, std::move(cols), null_count, std::move(*null_mask))); + + nanoarrow::UniqueSchema schema; + ArrowSchemaInit(schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(schema.get(), 6)); + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema->children[0], NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[0], "a")); + if (columns[0]->null_count() > 0) { + schema->children[0]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[0]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema->children[1], NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[1], "b")); + if (columns[1]->null_count() > 0) { + schema->children[1]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[1]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema->children[2], NANOARROW_TYPE_UINT32)); + NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateDictionary(schema->children[2])); + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(schema->children[2]->dictionary, NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[2], "c")); + if (columns[2]->null_count() > 0) { + schema->children[2]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[2]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema->children[3], NANOARROW_TYPE_BOOL)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[3], "d")); + if (columns[3]->null_count() > 0) { + schema->children[3]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[3]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaInitFromType(schema->children[4], NANOARROW_TYPE_LIST)); + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(schema->children[4]->children[0], NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[4]->children[0], "element")); + if (columns[4]->child(1).null_count() > 0) { + schema->children[4]->children[0]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[4]->children[0]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[4], "e")); + if (columns[4]->has_nulls()) { + schema->children[4]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[4]->flags = 0; + } + + ArrowSchemaInit(schema->children[5]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(schema->children[5], 2)); + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(schema->children[5]->children[0], NANOARROW_TYPE_INT64)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[5]->children[0], "integral")); + if (columns[5]->child(0).has_nulls()) { + schema->children[5]->children[0]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[5]->children[0]->flags = 0; + } + + NANOARROW_THROW_NOT_OK( + ArrowSchemaInitFromType(schema->children[5]->children[1], NANOARROW_TYPE_STRING)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[5]->children[1], "string")); + if (columns[5]->child(1).has_nulls()) { + schema->children[5]->children[1]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[5]->children[1]->flags = 0; + } + + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(schema->children[5], "f")); + if (columns[5]->has_nulls()) { + schema->children[5]->flags |= ARROW_FLAG_NULLABLE; + } else { + schema->children[5]->flags = 0; + } + + nanoarrow::UniqueArray arrow; + NANOARROW_THROW_NOT_OK(ArrowArrayInitFromSchema(arrow.get(), schema.get(), nullptr)); + + get_nanoarrow_array(arrow->children[0], int64_data, validity); + get_nanoarrow_array(arrow->children[1], string_data, validity); + cudf::dictionary_column_view view(dict_col->view()); + auto keys = cudf::test::to_host(view.keys()).first; + auto indices = cudf::test::to_host(view.indices()).first; + get_nanoarrow_dict_array(arrow->children[2], + std::vector(keys.begin(), keys.end()), + std::vector(indices.begin(), indices.end()), + validity); + get_nanoarrow_array(arrow->children[3], bool_data, bool_validity); + get_nanoarrow_list_array(arrow->children[4], + list_int64_data, + list_offsets, + list_int64_data_validity, + bool_data_validity); + + get_nanoarrow_array(arrow->children[5]->children[0], int64_data, validity); + get_nanoarrow_array(arrow->children[5]->children[1], string_data, validity); + arrow->children[5]->length = length; + NANOARROW_THROW_NOT_OK(ArrowBitmapReserve(ArrowArrayValidityBitmap(arrow->children[5]), length)); + std::for_each(bool_data_validity.begin(), bool_data_validity.end(), [&](auto&& elem) { + NANOARROW_THROW_NOT_OK( + ArrowBitmapAppend(ArrowArrayValidityBitmap(arrow->children[5]), (elem) ? 1 : 0, 1)); + }); + arrow->children[5]->null_count = + ArrowBitCountSet(ArrowArrayValidityBitmap(arrow->children[5])->buffer.data, 0, length); + + CUDF_EXPECTS(ArrowArrayFinishBuildingDefault(arrow.get(), nullptr) == NANOARROW_OK, + "failed to build example Arrays"); + + return std::make_tuple( + std::make_unique(std::move(columns)), std::move(schema), std::move(arrow)); +} + +struct BaseArrowFixture : public cudf::test::BaseFixture { + void compare_schemas(const ArrowSchema* expected, const ArrowSchema* actual) + { + EXPECT_STREQ(expected->format, actual->format); + EXPECT_STREQ(expected->name, actual->name); + EXPECT_STREQ(expected->metadata, actual->metadata); + EXPECT_EQ(expected->flags, actual->flags); + EXPECT_EQ(expected->n_children, actual->n_children); + + if (expected->n_children == 0) { + EXPECT_EQ(nullptr, actual->children); + } else { + for (int i = 0; i < expected->n_children; ++i) { + SCOPED_TRACE(expected->children[i]->name); + compare_schemas(expected->children[i], actual->children[i]); + } + } + + if (expected->dictionary != nullptr) { + EXPECT_NE(nullptr, actual->dictionary); + SCOPED_TRACE("dictionary"); + compare_schemas(expected->dictionary, actual->dictionary); + } else { + EXPECT_EQ(nullptr, actual->dictionary); + } + } + + void compare_device_buffers(const size_t nbytes, + const int buffer_idx, + const ArrowArray* expected, + const ArrowArray* actual) + { + std::vector actual_bytes; + std::vector expected_bytes; + expected_bytes.resize(nbytes); + actual_bytes.resize(nbytes); + + // synchronous copies so we don't have to worry about async weirdness + cudaMemcpy( + expected_bytes.data(), expected->buffers[buffer_idx], nbytes, cudaMemcpyDeviceToHost); + cudaMemcpy(actual_bytes.data(), actual->buffers[buffer_idx], nbytes, cudaMemcpyDeviceToHost); + + ASSERT_EQ(expected_bytes, actual_bytes); + } + + void compare_arrays(const ArrowSchema* schema, + const ArrowArray* expected, + const ArrowArray* actual) + { + ArrowSchemaView schema_view; + ArrowSchemaViewInit(&schema_view, schema, nullptr); + + EXPECT_EQ(expected->length, actual->length); + EXPECT_EQ(expected->null_count, actual->null_count); + EXPECT_EQ(expected->offset, actual->offset); + EXPECT_EQ(expected->n_buffers, actual->n_buffers); + EXPECT_EQ(expected->n_children, actual->n_children); + + if (expected->length > 0) { + EXPECT_EQ(expected->buffers[0], actual->buffers[0]); + if (schema_view.type == NANOARROW_TYPE_BOOL) { + const size_t nbytes = (expected->length + 7) >> 3; + compare_device_buffers(nbytes, 1, expected, actual); + } else if (schema_view.type == NANOARROW_TYPE_DECIMAL128) { + const size_t nbytes = (expected->length * sizeof(__int128_t)); + compare_device_buffers(nbytes, 1, expected, actual); + } else { + for (int i = 1; i < expected->n_buffers; ++i) { + EXPECT_EQ(expected->buffers[i], actual->buffers[i]); + } + } + } + + if (expected->n_children == 0) { + EXPECT_EQ(nullptr, actual->children); + } else { + for (int i = 0; i < expected->n_children; ++i) { + SCOPED_TRACE(schema->children[i]->name); + compare_arrays(schema->children[i], expected->children[i], actual->children[i]); + } + } + + if (expected->dictionary != nullptr) { + EXPECT_NE(nullptr, actual->dictionary); + SCOPED_TRACE("dictionary"); + compare_arrays(schema->dictionary, expected->dictionary, actual->dictionary); + } else { + EXPECT_EQ(nullptr, actual->dictionary); + } + } +}; + +struct ToArrowDeviceTest : public BaseArrowFixture {}; + +template +struct ToArrowDeviceTestDurationsTest : public BaseArrowFixture {}; + +TYPED_TEST_SUITE(ToArrowDeviceTestDurationsTest, cudf::test::DurationTypes); + +TEST_F(ToArrowDeviceTest, EmptyTable) +{ + const auto [table, schema, arr] = get_nanoarrow_tables(0); + + auto struct_meta = cudf::column_metadata{"f"}; + struct_meta.children_meta = {{"integral"}, {"string"}}; + + cudf::dictionary_column_view dview{table->view().column(2)}; + + std::vector meta{{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}; + auto got_arrow_schema = cudf::to_arrow_schema(table->view(), meta); + + compare_schemas(schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + auto got_arrow_device = cudf::to_arrow_device(std::move(*table)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_device->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_device->device_type); + + compare_arrays(schema.get(), arr.get(), &got_arrow_device->array); + ArrowArrayRelease(&got_arrow_device->array); +} + +TEST_F(ToArrowDeviceTest, DateTimeTable) +{ + auto data = {1, 2, 3, 4, 5, 6}; + auto col = + cudf::test::fixed_width_column_wrapper(data); + std::vector> cols; + cols.emplace_back(col.release()); + cudf::table input(std::move(cols)); + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + ArrowSchemaInit(expected_schema->children[0]); + ArrowSchemaSetTypeDateTime( + expected_schema->children[0], NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MILLI, nullptr); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = 0; + + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + auto data_ptr = input.get_column(0).view().data(); + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + EXPECT_EQ(data.size(), got_arrow_array->array.length); + EXPECT_EQ(0, got_arrow_array->array.null_count); + EXPECT_EQ(0, got_arrow_array->array.offset); + EXPECT_EQ(1, got_arrow_array->array.n_children); + EXPECT_EQ(nullptr, got_arrow_array->array.buffers[0]); + + EXPECT_EQ(data.size(), got_arrow_array->array.children[0]->length); + EXPECT_EQ(0, got_arrow_array->array.children[0]->null_count); + EXPECT_EQ(0, got_arrow_array->array.children[0]->offset); + EXPECT_EQ(nullptr, got_arrow_array->array.children[0]->buffers[0]); + EXPECT_EQ(data_ptr, got_arrow_array->array.children[0]->buffers[1]); + + ArrowArrayRelease(&got_arrow_array->array); +} + +TYPED_TEST(ToArrowDeviceTestDurationsTest, DurationTable) +{ + using T = TypeParam; + + if (cudf::type_to_id() == cudf::type_id::DURATION_DAYS) { return; } + + auto data = {T{1}, T{2}, T{3}, T{4}, T{5}, T{6}}; + auto col = cudf::test::fixed_width_column_wrapper(data); + + std::vector> cols; + cols.emplace_back(col.release()); + cudf::table input(std::move(cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + + ArrowSchemaInit(expected_schema->children[0]); + const ArrowTimeUnit arrow_unit = [&] { + switch (cudf::type_to_id()) { + case cudf::type_id::DURATION_SECONDS: return NANOARROW_TIME_UNIT_SECOND; + case cudf::type_id::DURATION_MILLISECONDS: return NANOARROW_TIME_UNIT_MILLI; + case cudf::type_id::DURATION_MICROSECONDS: return NANOARROW_TIME_UNIT_MICRO; + case cudf::type_id::DURATION_NANOSECONDS: return NANOARROW_TIME_UNIT_NANO; + default: CUDF_FAIL("Unsupported duration unit in arrow"); + } + }(); + ArrowSchemaSetTypeDateTime( + expected_schema->children[0], NANOARROW_TYPE_DURATION, arrow_unit, nullptr); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = 0; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + BaseArrowFixture::compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + auto data_ptr = input.get_column(0).view().data(); + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + EXPECT_EQ(data.size(), got_arrow_array->array.length); + EXPECT_EQ(0, got_arrow_array->array.null_count); + EXPECT_EQ(0, got_arrow_array->array.offset); + EXPECT_EQ(1, got_arrow_array->array.n_children); + EXPECT_EQ(nullptr, got_arrow_array->array.buffers[0]); + + EXPECT_EQ(data.size(), got_arrow_array->array.children[0]->length); + EXPECT_EQ(0, got_arrow_array->array.children[0]->null_count); + EXPECT_EQ(0, got_arrow_array->array.children[0]->offset); + EXPECT_EQ(nullptr, got_arrow_array->array.children[0]->buffers[0]); + EXPECT_EQ(data_ptr, got_arrow_array->array.children[0]->buffers[1]); + + ArrowArrayRelease(&got_arrow_array->array); +} + +TEST_F(ToArrowDeviceTest, NestedList) +{ + auto valids = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3 != 0; }); + auto col = cudf::test::lists_column_wrapper( + {{{{{1, 2}, valids}, {{3, 4}, valids}, {5}}, {{6}, {{7, 8, 9}, valids}}}, valids}); + + std::vector> cols; + cols.emplace_back(col.release()); + cudf::table input(std::move(cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + + ArrowSchemaInitFromType(expected_schema->children[0], NANOARROW_TYPE_LIST); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = ARROW_FLAG_NULLABLE; + + ArrowSchemaInitFromType(expected_schema->children[0]->children[0], NANOARROW_TYPE_LIST); + ArrowSchemaSetName(expected_schema->children[0]->children[0], "element"); + expected_schema->children[0]->children[0]->flags = 0; + + ArrowSchemaInitFromType(expected_schema->children[0]->children[0]->children[0], + NANOARROW_TYPE_INT64); + ArrowSchemaSetName(expected_schema->children[0]->children[0]->children[0], "element"); + expected_schema->children[0]->children[0]->children[0]->flags = ARROW_FLAG_NULLABLE; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + nanoarrow::UniqueArray expected_array; + EXPECT_EQ(NANOARROW_OK, + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + auto top_list = expected_array->children[0]; + cudf::lists_column_view lview{input.get_column(0).view()}; + populate_list_from_col(top_list, lview); + cudf::lists_column_view nested_view{lview.child()}; + populate_list_from_col(top_list->children[0], nested_view); + populate_from_col(top_list->children[0]->children[0], nested_view.child()); + + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr); + + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + ArrowArrayRelease(&got_arrow_array->array); +} + +TEST_F(ToArrowDeviceTest, StructColumn) +{ + // Create cudf table + auto nested_type_field_names = + std::vector>{{"string", "integral", "bool", "nested_list", "struct"}}; + auto str_col = + cudf::test::strings_column_wrapper{ + "Samuel Vimes", "Carrot Ironfoundersson", "Angua von Überwald"} + .release(); + auto str_col2 = + cudf::test::strings_column_wrapper{{"CUDF", "ROCKS", "EVERYWHERE"}, {0, 1, 0}}.release(); + int num_rows{str_col->size()}; + auto int_col = cudf::test::fixed_width_column_wrapper{{48, 27, 25}}.release(); + auto int_col2 = + cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); + auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); + auto list_col = + cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) + .release(); + vector_of_columns cols2; + cols2.push_back(std::move(str_col2)); + cols2.push_back(std::move(int_col2)); + auto [null_mask, null_count] = + cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper{{true, true, false}}); + auto sub_struct_col = + cudf::make_structs_column(num_rows, std::move(cols2), null_count, std::move(*null_mask)); + vector_of_columns cols; + cols.push_back(std::move(str_col)); + cols.push_back(std::move(int_col)); + cols.push_back(std::move(bool_col)); + cols.push_back(std::move(list_col)); + cols.push_back(std::move(sub_struct_col)); + + auto struct_col = cudf::make_structs_column(num_rows, std::move(cols), 0, {}); + std::vector> table_cols; + table_cols.emplace_back(struct_col.release()); + cudf::table input(std::move(table_cols)); + + // Create name metadata + auto sub_metadata = cudf::column_metadata{"struct"}; + sub_metadata.children_meta = {{"string2"}, {"integral2"}}; + auto metadata = cudf::column_metadata{"a"}; + metadata.children_meta = {{"string"}, {"integral"}, {"bool"}, {"nested_list"}, sub_metadata}; + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + + ArrowSchemaInit(expected_schema->children[0]); + ArrowSchemaSetTypeStruct(expected_schema->children[0], 5); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = 0; + + auto child = expected_schema->children[0]; + ArrowSchemaInitFromType(child->children[0], NANOARROW_TYPE_STRING); + ArrowSchemaSetName(child->children[0], "string"); + child->children[0]->flags = 0; + + ArrowSchemaInitFromType(child->children[1], NANOARROW_TYPE_INT32); + ArrowSchemaSetName(child->children[1], "integral"); + child->children[1]->flags = 0; + + ArrowSchemaInitFromType(child->children[2], NANOARROW_TYPE_BOOL); + ArrowSchemaSetName(child->children[2], "bool"); + child->children[2]->flags = 0; + + ArrowSchemaInitFromType(child->children[3], NANOARROW_TYPE_LIST); + ArrowSchemaSetName(child->children[3], "nested_list"); + child->children[3]->flags = 0; + ArrowSchemaInitFromType(child->children[3]->children[0], NANOARROW_TYPE_LIST); + ArrowSchemaSetName(child->children[3]->children[0], "element"); + child->children[3]->children[0]->flags = 0; + ArrowSchemaInitFromType(child->children[3]->children[0]->children[0], NANOARROW_TYPE_INT64); + ArrowSchemaSetName(child->children[3]->children[0]->children[0], "element"); + child->children[3]->children[0]->children[0]->flags = 0; + + ArrowSchemaInit(child->children[4]); + ArrowSchemaSetTypeStruct(child->children[4], 2); + ArrowSchemaSetName(child->children[4], "struct"); + + ArrowSchemaInitFromType(child->children[4]->children[0], NANOARROW_TYPE_STRING); + ArrowSchemaSetName(child->children[4]->children[0], "string2"); + ArrowSchemaInitFromType(child->children[4]->children[1], NANOARROW_TYPE_INT32); + ArrowSchemaSetName(child->children[4]->children[1], "integral2"); + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{metadata}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + nanoarrow::UniqueArray expected_array; + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr); + + expected_array->length = input.num_rows(); + + auto array_a = expected_array->children[0]; + auto view_a = input.view().column(0); + array_a->length = view_a.size(); + array_a->null_count = view_a.null_count(); + + ArrowBufferSetAllocator(ArrowArrayBuffer(array_a, 0), noop_alloc); + ArrowArrayValidityBitmap(array_a)->buffer.data = + const_cast(reinterpret_cast(view_a.null_mask())); + + populate_from_col(array_a->children[0], view_a.child(0)); + populate_from_col(array_a->children[1], view_a.child(1)); + populate_from_col(array_a->children[2], view_a.child(2)); + populate_list_from_col(array_a->children[3], cudf::lists_column_view{view_a.child(3)}); + populate_list_from_col(array_a->children[3]->children[0], + cudf::lists_column_view{view_a.child(3).child(1)}); + populate_from_col(array_a->children[3]->children[0]->children[0], + view_a.child(3).child(1).child(1)); + + auto array_struct = array_a->children[4]; + auto view_struct = view_a.child(4); + array_struct->length = view_struct.size(); + array_struct->null_count = view_struct.null_count(); + + ArrowBufferSetAllocator(ArrowArrayBuffer(array_struct, 0), noop_alloc); + ArrowArrayValidityBitmap(array_struct)->buffer.data = + const_cast(reinterpret_cast(view_struct.null_mask())); + + populate_from_col(array_struct->children[0], view_struct.child(0)); + populate_from_col(array_struct->children[1], view_struct.child(1)); + + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr); + + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + ArrowArrayRelease(&got_arrow_array->array); +} + +template +using fp_wrapper = cudf::test::fixed_point_column_wrapper; + +TEST_F(ToArrowDeviceTest, FixedPoint64Table) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const expect_data = std::vector{-1, -1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0}; + auto col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); + std::vector> table_cols; + table_cols.emplace_back(col.release()); + auto input = cudf::table(std::move(table_cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + ArrowSchemaInit(expected_schema->children[0]); + ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = 0; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + auto result_dev_data = std::make_unique>( + expect_data.size(), cudf::get_default_stream()); + cudaMemcpy(result_dev_data->data(), + expect_data.data(), + sizeof(int64_t) * expect_data.size(), + cudaMemcpyHostToDevice); + + cudf::get_default_stream().synchronize(); + nanoarrow::UniqueArray expected_array; + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr); + expected_array->length = input.num_rows(); + + expected_array->children[0]->length = input.num_rows(); + ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc); + ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data = + const_cast(reinterpret_cast(input.view().column(0).null_mask())); + + auto data_ptr = reinterpret_cast(result_dev_data->data()); + ArrowBufferSetAllocator( + ArrowArrayBuffer(expected_array->children[0], 1), + ArrowBufferDeallocator( + [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { + auto buf = + reinterpret_cast>*>(alloc->private_data); + delete buf; + }, + new std::unique_ptr>(std::move(result_dev_data)))); + ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr; + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr); + + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + ASSERT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + ASSERT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + ArrowArrayRelease(&got_arrow_array->array); + } +} + +TEST_F(ToArrowDeviceTest, FixedPoint128Table) +{ + using namespace numeric; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const expect_data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + auto col = fp_wrapper<__int128_t>({-1, 2, 3, 4, 5, 6}, scale_type{scale}); + std::vector> table_cols; + table_cols.emplace_back(col.release()); + auto input = cudf::table(std::move(table_cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + ArrowSchemaSetTypeStruct(expected_schema.get(), 1); + ArrowSchemaInit(expected_schema->children[0]); + ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision<__int128_t>(), + -scale); + ArrowSchemaSetName(expected_schema->children[0], "a"); + expected_schema->children[0]->flags = 0; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + ArrowSchemaRelease(got_arrow_schema.get()); + + nanoarrow::UniqueArray expected_array; + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr); + expected_array->length = input.num_rows(); + + populate_from_col<__int128_t>(expected_array->children[0], input.view().column(0)); + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr); + + auto got_arrow_array = cudf::to_arrow_device(std::move(input)); + EXPECT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + EXPECT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + ArrowArrayRelease(&got_arrow_array->array); + } +} diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 7afc8fe19bf..b891ff99d47 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -306,6 +306,7 @@ def clean_all_xml_files(path): intersphinx_mapping = { "cupy": ("https://docs.cupy.dev/en/stable/", None), "dlpack": ("https://dmlc.github.io/dlpack/latest/", None), + "nanoarrow": ("https://arrow.apache.org/nanoarrow/latest", None), "numpy": ("https://numpy.org/doc/stable", None), "pandas": ("https://pandas.pydata.org/docs/", None), "pyarrow": ("https://arrow.apache.org/docs/", None),