Skip to content

Commit 052da9c

Browse files
Compile Optimizations - batch 3 (#5119)
Next round of compile optimizations. Big focus on shuffle operations. Breaking since it deletes some deprecated functions. Aggregate reduction in size of libcugraph.so (with first two batches): 9% Authors: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Joseph Nke (https://github.com/jnke2016) - Seunghwa Kang (https://github.com/seunghwak) URL: #5119
1 parent 07202f4 commit 052da9c

File tree

80 files changed

+643
-1647
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+643
-1647
lines changed

cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp

Lines changed: 30 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
#pragma once
1818

19-
#include <cugraph/detail/shuffle_wrappers.hpp>
2019
#include <cugraph/mtmg/handle.hpp>
20+
#include <cugraph/shuffle_functions.hpp>
2121

2222
// FIXME: Could use std::span once compiler supports C++20
2323
#include <raft/core/host_span.hpp>
@@ -301,38 +301,35 @@ class per_device_edgelist_t {
301301
if (edge_end_time_) resize_and_copy_buffers(*edge_end_time_, total_size, stream);
302302
}
303303

304-
auto tmp_wgt = wgt_ ? std::make_optional(std::move((*wgt_)[0])) : std::nullopt;
305-
auto tmp_edge_id = edge_id_ ? std::make_optional(std::move((*edge_id_)[0])) : std::nullopt;
306-
auto tmp_edge_type =
307-
edge_type_ ? std::make_optional(std::move((*edge_type_)[0])) : std::nullopt;
308-
auto tmp_edge_start_time =
309-
edge_start_time_ ? std::make_optional(std::move((*edge_start_time_)[0])) : std::nullopt;
310-
auto tmp_edge_end_time =
311-
edge_end_time_ ? std::make_optional(std::move((*edge_end_time_)[0])) : std::nullopt;
312-
313-
std::tie(store_transposed ? dst_[0] : src_[0],
314-
store_transposed ? src_[0] : dst_[0],
315-
tmp_wgt,
316-
tmp_edge_id,
317-
tmp_edge_type,
318-
tmp_edge_start_time,
319-
tmp_edge_end_time,
320-
std::ignore) =
321-
cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(
322-
handle.raft_handle(),
323-
store_transposed ? std::move(dst_[0]) : std::move(src_[0]),
324-
store_transposed ? std::move(src_[0]) : std::move(dst_[0]),
325-
std::move(tmp_wgt),
326-
std::move(tmp_edge_id),
327-
std::move(tmp_edge_type),
328-
std::move(tmp_edge_start_time),
329-
std::move(tmp_edge_end_time));
330-
331-
if (tmp_wgt) ((*wgt_)[0]) = std::move(*tmp_wgt);
332-
if (tmp_edge_id) ((*edge_id_)[0]) = std::move(*tmp_edge_id);
333-
if (tmp_edge_type) ((*edge_type_)[0]) = std::move(*tmp_edge_type);
334-
if (tmp_edge_start_time) ((*edge_start_time_)[0]) = std::move(*tmp_edge_start_time);
335-
if (tmp_edge_end_time) ((*edge_end_time_)[0]) = std::move(*tmp_edge_end_time);
304+
std::vector<cugraph::arithmetic_device_uvector_t> tmp_edge_properties{};
305+
if (wgt_) tmp_edge_properties.push_back(std::move((*wgt_)[0]));
306+
if (edge_id_) tmp_edge_properties.push_back(std::move((*edge_id_)[0]));
307+
if (edge_type_) tmp_edge_properties.push_back(std::move((*edge_type_)[0]));
308+
if (edge_start_time_) tmp_edge_properties.push_back(std::move((*edge_start_time_)[0]));
309+
if (edge_end_time_) tmp_edge_properties.push_back(std::move((*edge_end_time_)[0]));
310+
311+
std::tie(src_[0], dst_[0], tmp_edge_properties, std::ignore) =
312+
cugraph::shuffle_ext_edges(handle.raft_handle(),
313+
std::move(src_[0]),
314+
std::move(dst_[0]),
315+
std::move(tmp_edge_properties),
316+
store_transposed);
317+
318+
size_t pos{0};
319+
if (wgt_)
320+
((*wgt_)[0]) = std::move(std::get<rmm::device_uvector<weight_t>>(tmp_edge_properties[pos++]));
321+
if (edge_id_)
322+
((*edge_id_)[0]) =
323+
std::move(std::get<rmm::device_uvector<edge_t>>(tmp_edge_properties[pos++]));
324+
if (edge_type_)
325+
((*edge_type_)[0]) =
326+
std::move(std::get<rmm::device_uvector<edge_type_t>>(tmp_edge_properties[pos++]));
327+
if (edge_start_time_)
328+
((*edge_start_time_)[0]) =
329+
std::move(std::get<rmm::device_uvector<edge_time_t>>(tmp_edge_properties[pos++]));
330+
if (edge_end_time_)
331+
((*edge_end_time_)[0]) =
332+
std::move(std::get<rmm::device_uvector<edge_time_t>>(tmp_edge_properties[pos++]));
336333
}
337334

338335
private:

cpp/include/cugraph/shuffle_functions.hpp

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
*/
1616
#pragma once
1717

18+
#include <cugraph/arithmetic_variant_types.hpp>
1819
#include <cugraph/large_buffer_manager.hpp>
20+
#include <cugraph/utilities/dataframe_buffer.hpp>
1921

2022
#include <raft/core/handle.hpp>
2123
#include <raft/core/host_span.hpp>
@@ -75,51 +77,56 @@ shuffle_ext_vertex_value_pairs(raft::handle_t const& handle,
7577
* @brief Shuffle external edges to the owning GPUs (by edge partitioning)
7678
*
7779
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
78-
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
79-
* @tparam weight_t Type of edge weight. Currently float and double are supported.
80-
* @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is
81-
* supported.
82-
* @tparam edge_time_t Type of edge time. Needs to be an integral type.
8380
*
8481
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
8582
* handles to various CUDA libraries) to run graph algorithms.
8683
* @param edge_srcs Vector of source vertex ids
8784
* @param edge_dsts Vector of destination vertex ids
88-
* @param edge_weights Optional vector of edge weights
89-
* @param edge_ids Optional vector of edge ids
90-
* @param edge_types Optional vector of edge types
91-
* @param edge_start_times Optional vector of edge start times
92-
* @param edge_end_times Optional vector of edge end times
85+
* @param edge_properties Vector of edge properties, each element is an arithmetic device vector
9386
* @param store_transposed Should be true if shuffled edges will be used with a cugraph::graph_t
9487
* object with store_tranposed = true. Should be false otherwise.
95-
* @return Tuple of vectors storing edge sources, destinations, optional weights,
96-
* optional edge ids, optional edge types, optional edge start times, optional edge end
97-
* times mapped to this GPU and a vector storing the number of edges received from each GPU.
88+
* @return Tuple of vectors storing edge sources, destinations, and edge properties
9889
*/
99-
template <typename vertex_t,
100-
typename edge_t,
101-
typename weight_t,
102-
typename edge_type_t,
103-
typename edge_time_t>
90+
template <typename vertex_t>
10491
std::tuple<rmm::device_uvector<vertex_t>,
10592
rmm::device_uvector<vertex_t>,
106-
std::optional<rmm::device_uvector<weight_t>>,
107-
std::optional<rmm::device_uvector<edge_t>>,
108-
std::optional<rmm::device_uvector<edge_type_t>>,
109-
std::optional<rmm::device_uvector<edge_time_t>>,
110-
std::optional<rmm::device_uvector<edge_time_t>>,
93+
std::vector<cugraph::arithmetic_device_uvector_t>,
11194
std::vector<size_t>>
11295
shuffle_ext_edges(raft::handle_t const& handle,
11396
rmm::device_uvector<vertex_t>&& edge_srcs,
11497
rmm::device_uvector<vertex_t>&& edge_dsts,
115-
std::optional<rmm::device_uvector<weight_t>>&& edge_weights,
116-
std::optional<rmm::device_uvector<edge_t>>&& edge_ids,
117-
std::optional<rmm::device_uvector<edge_type_t>>&& edge_types,
118-
std::optional<rmm::device_uvector<edge_time_t>>&& edge_start_times,
119-
std::optional<rmm::device_uvector<edge_time_t>>&& edge_end_times,
98+
std::vector<cugraph::arithmetic_device_uvector_t>&& edge_properties,
12099
bool store_transposed,
121100
std::optional<large_buffer_type_t> large_buffer_type = std::nullopt);
122101

102+
/**
103+
* @ingroup graph_functions_cpp
104+
* @brief Shuffle internal edges to the owning GPUs (by edge partitioning)
105+
*
106+
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
107+
*
108+
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
109+
* handles to various CUDA libraries) to run graph algorithms.
110+
* @param edge_srcs Vector of source vertex ids
111+
* @param edge_dsts Vector of destination vertex ids
112+
* @param edge_properties Vector of edge properties, each element is an arithmetic device vector
113+
* @param store_transposed Should be true if shuffled edges will be used with a cugraph::graph_t
114+
* object with store_tranposed = true. Should be false otherwise.
115+
* @return Tuple of vectors storing edge sources, destinations, and edge properties
116+
*/
117+
template <typename vertex_t>
118+
std::tuple<rmm::device_uvector<vertex_t>,
119+
rmm::device_uvector<vertex_t>,
120+
std::vector<cugraph::arithmetic_device_uvector_t>,
121+
std::vector<size_t>>
122+
shuffle_int_edges(raft::handle_t const& handle,
123+
rmm::device_uvector<vertex_t>&& majors,
124+
rmm::device_uvector<vertex_t>&& minors,
125+
std::vector<cugraph::arithmetic_device_uvector_t>&& edge_properties,
126+
bool store_transposed,
127+
raft::host_span<vertex_t const> vertex_partition_range_lasts,
128+
std::optional<large_buffer_type_t> large_buffer_type = std::nullopt);
129+
123130
/**
124131
* @brief Shuffle local edge sources (already placed by edge partitioning) to the owning GPUs (by
125132
* vertex partitioning).
@@ -129,7 +136,8 @@ shuffle_ext_edges(raft::handle_t const& handle,
129136
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
130137
* handles to various CUDA libraries) to run graph algorithms.
131138
* @param edge_srcs Vector of local edge source IDs
132-
* @param vertex_partition_range_lasts Span of vertex partition range lasts (size = number of GPUs)
139+
* @param vertex_partition_range_lasts Span of vertex partition range lasts (size = number of
140+
* GPUs)
133141
* @param store_transposed Should be true if shuffled edges will be used with a cugraph::graph_t
134142
* object with store_tranposed = true. Should be false otherwise.
135143
* @return Vector of shuffled edge source vertex IDs (shuffled by vertex partitioning).

cpp/src/c_api/allgather.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -25,7 +25,6 @@
2525

2626
#include <cugraph/algorithms.hpp>
2727
#include <cugraph/detail/collect_comm_wrapper.hpp>
28-
#include <cugraph/detail/shuffle_wrappers.hpp>
2928
#include <cugraph/detail/utility_wrappers.hpp>
3029
#include <cugraph/graph_functions.hpp>
3130

cpp/src/c_api/decompress_to_edgelist.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <cugraph_c/algorithms.h>
2525

2626
#include <cugraph/algorithms.hpp>
27-
#include <cugraph/detail/shuffle_wrappers.hpp>
2827
#include <cugraph/detail/utility_wrappers.hpp>
2928
#include <cugraph/graph_functions.hpp>
3029

cpp/src/c_api/ecg.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include <cugraph_c/community_algorithms.h>
1818

1919
#include <cugraph/algorithms.hpp>
20-
#include <cugraph/detail/shuffle_wrappers.hpp>
2120
#include <cugraph/detail/utility_wrappers.hpp>
2221
#include <cugraph/graph_functions.hpp>
2322

cpp/src/c_api/extract_vertex_list.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <cugraph_c/algorithms.h>
2525

2626
#include <cugraph/algorithms.hpp>
27-
#include <cugraph/detail/shuffle_wrappers.hpp>
2827
#include <cugraph/detail/utility_wrappers.hpp>
2928
#include <cugraph/graph_functions.hpp>
3029

cpp/src/c_api/graph_functions.cpp

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <cugraph_c/graph_functions.h>
2626

2727
#include <cugraph/algorithms.hpp>
28-
#include <cugraph/detail/shuffle_wrappers.hpp>
2928
#include <cugraph/detail/utility_wrappers.hpp>
3029
#include <cugraph/graph_functions.hpp>
3130
#include <cugraph/shuffle_functions.hpp>
@@ -73,27 +72,14 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor {
7372
second_copy.data(), second_->as_type<vertex_t>(), second_->size_, handle_.get_stream());
7473

7574
if constexpr (multi_gpu) {
76-
std::tie(first_copy,
77-
second_copy,
78-
std::ignore,
79-
std::ignore,
80-
std::ignore,
81-
std::ignore,
82-
std::ignore,
83-
std::ignore) =
84-
cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
85-
vertex_t,
86-
edge_t,
87-
weight_t,
88-
edge_type_type_t,
89-
int32_t>(handle_,
90-
std::move(first_copy),
91-
std::move(second_copy),
92-
std::nullopt,
93-
std::nullopt,
94-
std::nullopt,
95-
std::nullopt,
96-
std::nullopt);
75+
std::vector<cugraph::arithmetic_device_uvector_t> edge_properties{};
76+
77+
std::tie(first_copy, second_copy, std::ignore, std::ignore) =
78+
cugraph::shuffle_ext_edges(handle_,
79+
std::move(first_copy),
80+
std::move(second_copy),
81+
std::move(edge_properties),
82+
false);
9783
}
9884
// FIXME: use std::tuple (template) instead.
9985
result_ = new cugraph::c_api::cugraph_vertex_pairs_t{

cpp/src/c_api/graph_mg.cpp

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -146,39 +146,40 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
146146
rmm::device_uvector<vertex_t> edgelist_dsts =
147147
concatenate<vertex_t>(handle_, dst_, num_arrays_);
148148

149-
std::optional<rmm::device_uvector<weight_t>> edgelist_weights =
150-
weights_ ? std::make_optional(concatenate<weight_t>(handle_, weights_, num_arrays_))
149+
std::vector<cugraph::arithmetic_device_uvector_t> edgelist_edge_properties{};
150+
151+
if (weights_)
152+
edgelist_edge_properties.push_back(concatenate<weight_t>(handle_, weights_, num_arrays_));
153+
if (edge_ids_)
154+
edgelist_edge_properties.push_back(concatenate<edge_t>(handle_, edge_ids_, num_arrays_));
155+
if (edge_type_ids_)
156+
edgelist_edge_properties.push_back(
157+
concatenate<edge_type_t>(handle_, edge_type_ids_, num_arrays_));
158+
159+
std::tie(edgelist_srcs, edgelist_dsts, edgelist_edge_properties, std::ignore) =
160+
cugraph::shuffle_ext_edges(handle_,
161+
std::move(edgelist_srcs),
162+
std::move(edgelist_dsts),
163+
std::move(edgelist_edge_properties),
164+
store_transposed);
165+
166+
size_t pos{0};
167+
auto edgelist_weights =
168+
weights_ ? std::make_optional(std::move(
169+
std::get<rmm::device_uvector<weight_t>>(edgelist_edge_properties[pos++])))
151170
: std::nullopt;
152-
153-
std::optional<rmm::device_uvector<edge_t>> edgelist_edge_ids =
154-
edge_ids_ ? std::make_optional(concatenate<edge_t>(handle_, edge_ids_, num_arrays_))
171+
auto edgelist_edge_ids =
172+
edge_ids_ ? std::make_optional(std::move(
173+
std::get<rmm::device_uvector<edge_t>>(edgelist_edge_properties[pos++])))
155174
: std::nullopt;
156-
157-
std::optional<rmm::device_uvector<edge_type_t>> edgelist_edge_types =
158-
edge_type_ids_
159-
? std::make_optional(concatenate<edge_type_t>(handle_, edge_type_ids_, num_arrays_))
160-
: std::nullopt;
175+
auto edgelist_edge_types =
176+
edge_type_ids_ ? std::make_optional(std::move(std::get<rmm::device_uvector<edge_type_t>>(
177+
edgelist_edge_properties[pos++])))
178+
: std::nullopt;
161179

162180
std::optional<rmm::device_uvector<edge_time_t>> edgelist_edge_start_times{std::nullopt};
163181
std::optional<rmm::device_uvector<edge_time_t>> edgelist_edge_end_times{std::nullopt};
164182

165-
std::tie(edgelist_srcs,
166-
edgelist_dsts,
167-
edgelist_weights,
168-
edgelist_edge_ids,
169-
edgelist_edge_types,
170-
edgelist_edge_start_times,
171-
edgelist_edge_end_times,
172-
std::ignore) = cugraph::shuffle_ext_edges(handle_,
173-
std::move(edgelist_srcs),
174-
std::move(edgelist_dsts),
175-
std::move(edgelist_weights),
176-
std::move(edgelist_edge_ids),
177-
std::move(edgelist_edge_types),
178-
std::move(edgelist_edge_start_times),
179-
std::move(edgelist_edge_end_times),
180-
store_transposed);
181-
182183
if (vertex_list) {
183184
vertex_list = cugraph::shuffle_ext_vertices(handle_, std::move(*vertex_list));
184185
}

cpp/src/c_api/hits.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -18,11 +18,11 @@
1818
#include "c_api/graph.hpp"
1919
#include "c_api/resource_handle.hpp"
2020
#include "c_api/utils.hpp"
21+
#include "detail/shuffle_wrappers.hpp"
2122

2223
#include <cugraph_c/algorithms.h>
2324

2425
#include <cugraph/algorithms.hpp>
25-
#include <cugraph/detail/shuffle_wrappers.hpp>
2626
#include <cugraph/detail/utility_wrappers.hpp>
2727
#include <cugraph/graph_functions.hpp>
2828

cpp/src/c_api/induced_subgraph.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <cugraph_c/algorithms.h>
2525

2626
#include <cugraph/algorithms.hpp>
27-
#include <cugraph/detail/shuffle_wrappers.hpp>
2827
#include <cugraph/detail/utility_wrappers.hpp>
2928
#include <cugraph/graph_functions.hpp>
3029

0 commit comments

Comments
 (0)