Skip to content

Commit

Permalink
Add to_arrow_device function to cudf interop using nanoarrow (#15047)
Browse files Browse the repository at this point in the history
Introduce new `to_arrow_device` and `to_arrow_schema` functions to utilize the `ArrowDeviceArray` structure for zero-copy passing of libcudf::table.

Add nanoarrow as a vendored lib and a script to update it.

Initial step towards addressing #14926

Authors:
  - Matt Topol (https://github.com/zeroshade)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)

URL: #15047
  • Loading branch information
zeroshade authored Apr 1, 2024
1 parent 09f8c8a commit 268996a
Show file tree
Hide file tree
Showing 9 changed files with 1,882 additions and 6 deletions.
8 changes: 6 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,14 @@ include(cmake/thirdparty/get_kvikio.cmake)
include(cmake/thirdparty/get_fmt.cmake)
# find spdlog
include(cmake/thirdparty/get_spdlog.cmake)
# find nanoarrow
include(cmake/thirdparty/get_nanoarrow.cmake)

# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved
if(NOT BUILD_SHARED_LIBS)
include("${rapids-cmake-dir}/export/find_package_file.cmake")
list(APPEND METADATA_KINDS BUILD INSTALL)
list(APPEND dependencies KvikIO ZLIB nvcomp)
list(APPEND dependencies KvikIO ZLIB nvcomp nanoarrow)
if(TARGET cufile::cuFile_interface)
list(APPEND dependencies cuFile)
endif()
Expand Down Expand Up @@ -358,6 +360,7 @@ add_library(
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/to_arrow.cu
src/interop/to_arrow_device.cu
src/interop/detail/arrow_allocator.cpp
src/io/avro/avro.cpp
src/io/avro/avro_gpu.cu
Expand Down Expand Up @@ -735,6 +738,7 @@ target_include_directories(
"$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${CUDF_GENERATED_INCLUDE_DIR}/include>"
PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
"$<BUILD_INTERFACE:${nanoarrow_SOURCE_DIR}/src>"
INTERFACE "$<INSTALL_INTERFACE:include>"
)

Expand Down Expand Up @@ -783,7 +787,7 @@ target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:cuFile_interface>
$<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
)

# Add Conda library, and include paths if specified
Expand Down
36 changes: 36 additions & 0 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# This function finds nanoarrow and sets any additional necessary environment variables.
function(find_and_configure_nanoarrow)
set(oneValueArgs VERSION FORK PINNED_TAG)
cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

rapids_cpm_find(
nanoarrow ${PKG_VERSION}
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/${PKG_FORK}/arrow-nanoarrow.git
GIT_TAG ${PKG_PINNED_TAG}
# TODO: Commit hashes are not supported with shallow clones. Can switch this if and when we pin
# to an actual tag.
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
)
set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
endfunction()

find_and_configure_nanoarrow(
VERSION 0.4.0 FORK apache PINNED_TAG c97720003ff863b81805bcdb9f7c91306ab6b6a8
)
96 changes: 95 additions & 1 deletion cpp/include/cudf/interop.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,11 +34,16 @@
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/mr/device/per_device_resource.hpp>

struct DLManagedTensor;

struct ArrowDeviceArray;

struct ArrowSchema;

namespace cudf {
/**
* @addtogroup interop_dlpack
Expand Down Expand Up @@ -162,6 +167,95 @@ std::shared_ptr<arrow::Scalar> to_arrow(cudf::scalar const& input,
column_metadata const& metadata = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
arrow::MemoryPool* ar_mr = arrow::default_memory_pool());

/**
* @brief typedef for a unique_ptr to an ArrowSchema with custom deleter
*
*/
using unique_schema_t = std::unique_ptr<ArrowSchema, void (*)(ArrowSchema*)>;

/**
* @brief typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter
*
*/
using unique_device_array_t = std::unique_ptr<ArrowDeviceArray, void (*)(ArrowDeviceArray*)>;

/**
* @brief Create ArrowSchema from cudf table and metadata
*
* Populates and returns an ArrowSchema C struct using a table and metadata.
*
* @note For decimals, since the precision is not stored for them in libcudf,
* decimals will be converted to an Arrow decimal128 which has the widest precision that cudf
* decimal type supports. For example, `numeric::decimal32` will be converted to Arrow decimal128
* with the precision of 9 which is the maximum precision for 32-bit types. Similarly,
* `numeric::decimal128` will be converted to Arrow decimal128 with the precision of 38.
*
* @param input Table to create a schema from
* @param metadata Contains the hierarchy of names of columns and children
* @return ArrowSchema generated from `input`
*/
unique_schema_t to_arrow_schema(cudf::table_view const& input,
cudf::host_span<column_metadata const> metadata);

/**
* @brief Create `ArrowDeviceArray` from cudf table and metadata
*
* Populates the C struct ArrowDeviceArray without performing copies if possible.
* This maintains the data on the GPU device and gives ownership of the table
* and its buffers to the ArrowDeviceArray struct.
*
* After calling this function, the release callback on the returned ArrowDeviceArray
* must be called to clean up the memory.
*
* @note For decimals, since the precision is not stored for them in libcudf
* it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*
* @note Copies will be performed in the cases where cudf differs from Arrow
* such as in the representation of bools (Arrow uses a bitmap, cudf uses 1-byte per value).
*
* @param table Input table, ownership of the data will be moved to the result
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used for any allocations during conversion
* @return ArrowDeviceArray which will have ownership of the GPU data, consumer must call release
*/
unique_device_array_t to_arrow_device(
cudf::table&& table,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create `ArrowDeviceArray` from cudf column and metadata
*
* Populates the C struct ArrowDeviceArray without performing copies if possible.
* This maintains the data on the GPU device and gives ownership of the table
* and its buffers to the ArrowDeviceArray struct.
*
* After calling this function, the release callback on the returned ArrowDeviceArray
* must be called to clean up the memory.
*
* @note For decimals, since the precision is not stored for them in libcudf
* it will be converted to an Arrow decimal128 with the widest-precision the cudf decimal type
* supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision
* 9 which is the maximum precision for 32-bit types. Similar, numeric::decimal128 will be
* converted to Arrow decimal128 of the precision 38.
*
* @note Copies will be performed in the cases where cudf differs from Arrow such as
* in the representation of bools (Arrow uses a bitmap, cudf uses 1 byte per value).
*
* @param col Input column, ownership of the data will be moved to the result
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used for any allocations during conversion
* @return ArrowDeviceArray which will have ownership of the GPU data
*/
unique_device_array_t to_arrow_device(
cudf::column&& col,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create `cudf::table` from given arrow Table input
*
Expand Down
48 changes: 48 additions & 0 deletions cpp/include/cudf/interop/detail/arrow.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <nanoarrow/nanoarrow.hpp>

// from Arrow C Device Data Interface
// https://arrow.apache.org/docs/format/CDeviceDataInterface.html
#ifndef ARROW_C_DEVICE_DATA_INTERFACE
#define ARROW_C_DEVICE_DATA_INTERFACE

// Device type for the allocated memory
typedef int32_t ArrowDeviceType;

// CPU device, same as using ArrowArray directly
#define ARROW_DEVICE_CPU 1
// CUDA GPU Device
#define ARROW_DEVICE_CUDA 2
// Pinned CUDA CPU memory by cudaMallocHost
#define ARROW_DEVICE_CUDA_HOST 3
// CUDA managed/unified memory allocated by cudaMallocManaged
#define ARROW_DEVICE_CUDA_MANAGED 13

struct ArrowDeviceArray {
struct ArrowArray array;
int64_t device_id;
ArrowDeviceType device_type;
void* sync_event;

// reserved bytes for future expansion
int64_t reserved[3];
};

#endif // ARROW_C_DEVICE_DATA_INTERFACE
Loading

0 comments on commit 268996a

Please sign in to comment.