Skip to content

Commit e4d51b2

Browse files
authored
compute cosine similarity for vertex pairs (#4482)
compute cosine similarity for vertex pairs Authors: - Naim (https://github.com/naimnv) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: #4482
1 parent f519ac1 commit e4d51b2

15 files changed

+764
-88
lines changed

cpp/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,11 @@ set(CUGRAPH_SOURCES
193193
src/link_prediction/jaccard_sg.cu
194194
src/link_prediction/sorensen_sg.cu
195195
src/link_prediction/overlap_sg.cu
196+
src/link_prediction/cosine_sg.cu
196197
src/link_prediction/jaccard_mg.cu
197198
src/link_prediction/sorensen_mg.cu
198199
src/link_prediction/overlap_mg.cu
200+
src/link_prediction/cosine_mg.cu
199201
src/layout/legacy/force_atlas2.cu
200202
src/converters/legacy/COOtoCSR.cu
201203
src/community/legacy/spectral_clustering.cu

cpp/include/cugraph/algorithms.hpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,37 @@ rmm::device_uvector<weight_t> jaccard_coefficients(
20822082
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
20832083
bool do_expensive_check = false);
20842084

2085+
/**
2086+
* @brief Compute Cosine similarity coefficient
2087+
*
2088+
* Similarity is computed for every pair of vertices specified. Note that
2089+
* similarity algorithms expect a symmetric graph.
2090+
*
2091+
* @throws cugraph::logic_error when an error occurs.
2092+
*
2093+
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
2094+
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
2095+
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
2096+
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
2097+
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
2098+
* handles to various CUDA libraries) to run graph algorithms.
2099+
* @param graph_view Graph view object.
2100+
* @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
2101+
* edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
2102+
* a weight of 1 for all edges.
2103+
* @param vertex_pairs tuple of device spans defining the vertex pairs to compute similarity for
2104+
* In a multi-gpu context each vertex pair should be local to this GPU.
2105+
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
2106+
* @return similarity coefficient for the corresponding @p vertex_pairs
2107+
*/
2108+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
2109+
rmm::device_uvector<weight_t> cosine_similarity_coefficients(
2110+
raft::handle_t const& handle,
2111+
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
2112+
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
2113+
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
2114+
bool do_expensive_check = false);
2115+
20852116
/**
20862117
* @brief Compute Sorensen similarity coefficient
20872118
*
@@ -2202,6 +2233,62 @@ std::
22022233
std::optional<size_t> topk,
22032234
bool do_expensive_check = false);
22042235

2236+
/**
2237+
* @brief Compute Consine all pairs similarity coefficient
2238+
*
2239+
* Similarity is computed for all pairs of vertices. Note that in a sparse
2240+
* graph, many of the vertex pairs will have a score of zero. We actually
2241+
* compute similarity only for vertices that are two hop neighbors within
2242+
* the graph, since vertices that are not two hop neighbors will have
2243+
* a score of 0.
2244+
*
2245+
* If @p vertices is specified we will compute similarity on two hop
2246+
* neighbors the @p vertices. If @p vertices is not specified it will
2247+
* compute similarity on all two hop neighbors in the graph.
2248+
*
2249+
* If @p topk is specified only the top @p topk scoring vertex pairs
2250+
* will be returned, if not specified then scores for all computed vertex pairs
2251+
* will be returned.
2252+
*
2253+
* Note the list of two hop neighbors in the entire graph might be a large
2254+
* number of vertex pairs. If the graph is dense enough it could be as large
2255+
* as the the number of vertices squared, which might run out of memory.
2256+
*
2257+
* @throws cugraph::logic_error when an error occurs.
2258+
*
2259+
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
2260+
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
2261+
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
2262+
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
2263+
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
2264+
* handles to various CUDA libraries) to run graph algorithms.
2265+
* @param graph_view Graph view object.
2266+
* @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
2267+
* edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
2268+
* a weight of 1 for all edges.
2269+
* @param vertices optional device span defining the seed vertices. In a multi-gpu context the
2270+
* vertices should be local to this GPU.
2271+
* @param topk optional specification of the how many of the top scoring vertex pairs should be
2272+
* returned
2273+
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
2274+
* @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding
2275+
* elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
2276+
* one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
2277+
* If @p topk was specified then the vectors will be no longer than @p topk elements. In a
2278+
* multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
2279+
* will be returned on the local GPU for vertex v1.
2280+
*/
2281+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
2282+
std::
2283+
tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
2284+
cosine_similarity_all_pairs_coefficients(
2285+
raft::handle_t const& handle,
2286+
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
2287+
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
2288+
std::optional<raft::device_span<vertex_t const>> vertices,
2289+
std::optional<size_t> topk,
2290+
bool do_expensive_check = false);
2291+
22052292
/**
22062293
* @brief Compute Sorensen similarity coefficient
22072294
*

cpp/src/link_prediction/cosine_mg.cu

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include "link_prediction/cosine_similarity_impl.cuh"
17+
18+
namespace cugraph {
19+
20+
template rmm::device_uvector<float> cosine_similarity_coefficients(
21+
raft::handle_t const& handle,
22+
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
23+
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
24+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
25+
bool do_expensive_check);
26+
27+
template rmm::device_uvector<float> cosine_similarity_coefficients(
28+
raft::handle_t const& handle,
29+
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
30+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
31+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
32+
bool do_expensive_check);
33+
34+
template rmm::device_uvector<float> cosine_similarity_coefficients(
35+
raft::handle_t const& handle,
36+
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
37+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
38+
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
39+
bool do_expensive_check);
40+
41+
template rmm::device_uvector<double> cosine_similarity_coefficients(
42+
raft::handle_t const& handle,
43+
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
44+
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
45+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
46+
bool do_expensive_check);
47+
48+
template rmm::device_uvector<double> cosine_similarity_coefficients(
49+
raft::handle_t const& handle,
50+
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
51+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
52+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
53+
bool do_expensive_check);
54+
55+
template rmm::device_uvector<double> cosine_similarity_coefficients(
56+
raft::handle_t const& handle,
57+
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
58+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
59+
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
60+
bool do_expensive_check);
61+
62+
template std::
63+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
64+
cosine_similarity_all_pairs_coefficients(
65+
raft::handle_t const& handle,
66+
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
67+
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
68+
std::optional<raft::device_span<int32_t const>> vertices,
69+
std::optional<size_t> topk,
70+
bool do_expensive_check);
71+
72+
template std::
73+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
74+
cosine_similarity_all_pairs_coefficients(
75+
raft::handle_t const& handle,
76+
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
77+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
78+
std::optional<raft::device_span<int32_t const>> vertices,
79+
std::optional<size_t> topk,
80+
bool do_expensive_check);
81+
82+
template std::
83+
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<float>>
84+
cosine_similarity_all_pairs_coefficients(
85+
raft::handle_t const& handle,
86+
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
87+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
88+
std::optional<raft::device_span<int64_t const>> vertices,
89+
std::optional<size_t> topk,
90+
bool do_expensive_check);
91+
92+
template std::
93+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
94+
cosine_similarity_all_pairs_coefficients(
95+
raft::handle_t const& handle,
96+
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
97+
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
98+
std::optional<raft::device_span<int32_t const>> vertices,
99+
std::optional<size_t> topk,
100+
bool do_expensive_check);
101+
102+
template std::
103+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
104+
cosine_similarity_all_pairs_coefficients(
105+
raft::handle_t const& handle,
106+
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
107+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
108+
std::optional<raft::device_span<int32_t const>> vertices,
109+
std::optional<size_t> topk,
110+
bool do_expensive_check);
111+
112+
template std::
113+
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<double>>
114+
cosine_similarity_all_pairs_coefficients(
115+
raft::handle_t const& handle,
116+
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
117+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
118+
std::optional<raft::device_span<int64_t const>> vertices,
119+
std::optional<size_t> topk,
120+
bool do_expensive_check);
121+
122+
} // namespace cugraph

cpp/src/link_prediction/cosine_sg.cu

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include "link_prediction/cosine_similarity_impl.cuh"
17+
18+
namespace cugraph {
19+
20+
template rmm::device_uvector<float> cosine_similarity_coefficients(
21+
raft::handle_t const& handle,
22+
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
23+
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
24+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
25+
bool do_expensive_check);
26+
27+
template rmm::device_uvector<float> cosine_similarity_coefficients(
28+
raft::handle_t const& handle,
29+
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
30+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
31+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
32+
bool do_expensive_check);
33+
34+
template rmm::device_uvector<float> cosine_similarity_coefficients(
35+
raft::handle_t const& handle,
36+
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
37+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
38+
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
39+
bool do_expensive_check);
40+
41+
template rmm::device_uvector<double> cosine_similarity_coefficients(
42+
raft::handle_t const& handle,
43+
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
44+
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
45+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
46+
bool do_expensive_check);
47+
48+
template rmm::device_uvector<double> cosine_similarity_coefficients(
49+
raft::handle_t const& handle,
50+
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
51+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
52+
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
53+
bool do_expensive_check);
54+
55+
template rmm::device_uvector<double> cosine_similarity_coefficients(
56+
raft::handle_t const& handle,
57+
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
58+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
59+
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
60+
bool do_expensive_check);
61+
62+
template std::
63+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
64+
cosine_similarity_all_pairs_coefficients(
65+
raft::handle_t const& handle,
66+
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
67+
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
68+
std::optional<raft::device_span<int32_t const>> vertices,
69+
std::optional<size_t> topk,
70+
bool do_expensive_check);
71+
72+
template std::
73+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
74+
cosine_similarity_all_pairs_coefficients(
75+
raft::handle_t const& handle,
76+
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
77+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
78+
std::optional<raft::device_span<int32_t const>> vertices,
79+
std::optional<size_t> topk,
80+
bool do_expensive_check);
81+
82+
template std::
83+
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<float>>
84+
cosine_similarity_all_pairs_coefficients(
85+
raft::handle_t const& handle,
86+
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
87+
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
88+
std::optional<raft::device_span<int64_t const>> vertices,
89+
std::optional<size_t> topk,
90+
bool do_expensive_check);
91+
92+
template std::
93+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
94+
cosine_similarity_all_pairs_coefficients(
95+
raft::handle_t const& handle,
96+
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
97+
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
98+
std::optional<raft::device_span<int32_t const>> vertices,
99+
std::optional<size_t> topk,
100+
bool do_expensive_check);
101+
102+
template std::
103+
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
104+
cosine_similarity_all_pairs_coefficients(
105+
raft::handle_t const& handle,
106+
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
107+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
108+
std::optional<raft::device_span<int32_t const>> vertices,
109+
std::optional<size_t> topk,
110+
bool do_expensive_check);
111+
112+
template std::
113+
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<double>>
114+
cosine_similarity_all_pairs_coefficients(
115+
raft::handle_t const& handle,
116+
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
117+
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
118+
std::optional<raft::device_span<int64_t const>> vertices,
119+
std::optional<size_t> topk,
120+
bool do_expensive_check);
121+
122+
} // namespace cugraph

0 commit comments

Comments
 (0)