Skip to content

Commit cf8deb0

Browse files
authored
Merge pull request #4576 from rapidsai/branch-24.08
Forward-merge branch-24.08 into branch-24.10
2 parents 940e34d + 8f7fec9 commit cf8deb0

31 files changed

+4631
-150
lines changed

cpp/include/cugraph_c/similarity_algorithms.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,34 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_
145145
cugraph_similarity_result_t** result,
146146
cugraph_error_t** error);
147147

148+
/**
149+
* @brief Perform cosine similarity computation
150+
*
151+
* Compute the similarity for the specified vertex_pairs
152+
*
153+
* Note that cosine similarity must run on a symmetric graph.
154+
*
155+
* @param [in] handle Handle for accessing resources
156+
* @param [in] graph Pointer to graph
157+
* @param [in] vertex_pairs Vertex pair for input
158+
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
159+
* edge weight of 1
160+
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
161+
* `true`).
162+
* @param [out] result Opaque pointer to similarity results
163+
* @param [out] error Pointer to an error object storing details of any error. Will
164+
* be populated if error code is not CUGRAPH_SUCCESS
165+
* @return error code
166+
*/
167+
cugraph_error_code_t cugraph_cosine_similarity_coefficients(
168+
const cugraph_resource_handle_t* handle,
169+
cugraph_graph_t* graph,
170+
const cugraph_vertex_pairs_t* vertex_pairs,
171+
bool_t use_weight,
172+
bool_t do_expensive_check,
173+
cugraph_similarity_result_t** result,
174+
cugraph_error_t** error);
175+
148176
/**
149177
* @brief Perform All-Pairs Jaccard similarity computation
150178
*
@@ -259,6 +287,44 @@ cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(
259287
cugraph_similarity_result_t** result,
260288
cugraph_error_t** error);
261289

290+
/**
291+
* @brief Perform All Pairs cosine similarity computation
292+
*
293+
* Compute the similarity for all vertex pairs derived from the two-hop neighbors
294+
* of an optional specified vertex list. This function will identify the two-hop
295+
* neighbors of the specified vertices (all vertices in the graph if not specified)
296+
* and compute similarity for those vertices.
297+
*
298+
* If the topk parameter is specified then the result will only contain the top k
299+
* highest scoring results.
300+
*
301+
* Note that cosine similarity must run on a symmetric graph.
302+
*
303+
* @param [in] handle Handle for accessing resources
304+
* @param [in] graph Pointer to graph
305+
* @param [in] vertices Vertex list for input. If null then compute based on
306+
* all vertices in the graph.
307+
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
308+
* edge weight of 1
309+
* @param [in] topk Specify how many answers to return. Specifying SIZE_MAX
310+
* will return all values.
311+
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
312+
* `true`).
313+
* @param [out] result Opaque pointer to similarity results
314+
* @param [out] error Pointer to an error object storing details of any error. Will
315+
* be populated if error code is not CUGRAPH_SUCCESS
316+
* @return error code
317+
*/
318+
cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
319+
const cugraph_resource_handle_t* handle,
320+
cugraph_graph_t* graph,
321+
const cugraph_type_erased_device_array_view_t* vertices,
322+
bool_t use_weight,
323+
size_t topk,
324+
bool_t do_expensive_check,
325+
cugraph_similarity_result_t** result,
326+
cugraph_error_t** error);
327+
262328
#ifdef __cplusplus
263329
}
264330
#endif

cpp/src/c_api/similarity.cpp

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,22 @@ struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor {
212212
: std::nullopt,
213213
topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt);
214214

215+
cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
216+
handle_,
217+
v1.data(),
218+
v1.size(),
219+
number_map->data(),
220+
graph_view.vertex_partition_range_lasts(),
221+
false);
222+
223+
cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
224+
handle_,
225+
v2.data(),
226+
v2.size(),
227+
number_map->data(),
228+
graph_view.vertex_partition_range_lasts(),
229+
false);
230+
215231
result_ = new cugraph::c_api::cugraph_similarity_result_t{
216232
new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients,
217233
graph_->weight_type_),
@@ -274,6 +290,33 @@ struct sorensen_functor {
274290
}
275291
};
276292

293+
struct cosine_functor {
294+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
295+
rmm::device_uvector<weight_t> operator()(
296+
raft::handle_t const& handle,
297+
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
298+
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
299+
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
300+
{
301+
return cugraph::cosine_similarity_coefficients(
302+
handle, graph_view, edge_weight_view, vertex_pairs);
303+
}
304+
305+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
306+
std::tuple<rmm::device_uvector<vertex_t>,
307+
rmm::device_uvector<vertex_t>,
308+
rmm::device_uvector<weight_t>>
309+
operator()(raft::handle_t const& handle,
310+
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
311+
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
312+
std::optional<raft::device_span<vertex_t const>> vertices,
313+
std::optional<size_t> topk)
314+
{
315+
return cugraph::cosine_similarity_all_pairs_coefficients(
316+
handle, graph_view, edge_weight_view, vertices, topk);
317+
}
318+
};
319+
277320
struct overlap_functor {
278321
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
279322
rmm::device_uvector<weight_t> operator()(
@@ -300,6 +343,33 @@ struct overlap_functor {
300343
}
301344
};
302345

346+
struct cosine_similarity_functor {
347+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
348+
rmm::device_uvector<weight_t> operator()(
349+
raft::handle_t const& handle,
350+
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
351+
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
352+
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
353+
{
354+
return cugraph::cosine_similarity_coefficients(
355+
handle, graph_view, edge_weight_view, vertex_pairs);
356+
}
357+
358+
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
359+
std::tuple<rmm::device_uvector<vertex_t>,
360+
rmm::device_uvector<vertex_t>,
361+
rmm::device_uvector<weight_t>>
362+
operator()(raft::handle_t const& handle,
363+
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
364+
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
365+
std::optional<raft::device_span<vertex_t const>> vertices,
366+
std::optional<size_t> topk)
367+
{
368+
return cugraph::cosine_similarity_all_pairs_coefficients(
369+
handle, graph_view, edge_weight_view, vertices, topk);
370+
}
371+
};
372+
303373
} // namespace
304374

305375
extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity(
@@ -391,6 +461,28 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients(
391461
return cugraph::c_api::run_algorithm(graph, functor, result, error);
392462
}
393463

464+
extern "C" cugraph_error_code_t cugraph_cosine_similarity_coefficients(
465+
const cugraph_resource_handle_t* handle,
466+
cugraph_graph_t* graph,
467+
const cugraph_vertex_pairs_t* vertex_pairs,
468+
bool_t use_weight,
469+
bool_t do_expensive_check,
470+
cugraph_similarity_result_t** result,
471+
cugraph_error_t** error)
472+
{
473+
if (use_weight) {
474+
CAPI_EXPECTS(
475+
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
476+
CUGRAPH_INVALID_INPUT,
477+
"use_weight is true but edge weights are not provided.",
478+
*error);
479+
}
480+
similarity_functor functor(
481+
handle, graph, vertex_pairs, cosine_similarity_functor{}, use_weight, do_expensive_check);
482+
483+
return cugraph::c_api::run_algorithm(graph, functor, result, error);
484+
}
485+
394486
extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients(
395487
const cugraph_resource_handle_t* handle,
396488
cugraph_graph_t* graph,
@@ -459,3 +551,26 @@ extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(
459551

460552
return cugraph::c_api::run_algorithm(graph, functor, result, error);
461553
}
554+
555+
extern "C" cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
556+
const cugraph_resource_handle_t* handle,
557+
cugraph_graph_t* graph,
558+
const cugraph_type_erased_device_array_view_t* vertices,
559+
bool_t use_weight,
560+
size_t topk,
561+
bool_t do_expensive_check,
562+
cugraph_similarity_result_t** result,
563+
cugraph_error_t** error)
564+
{
565+
if (use_weight) {
566+
CAPI_EXPECTS(
567+
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
568+
CUGRAPH_INVALID_INPUT,
569+
"use_weight is true but edge weights are not provided.",
570+
*error);
571+
}
572+
all_pairs_similarity_functor functor(
573+
handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check);
574+
575+
return cugraph::c_api::run_algorithm(graph, functor, result, error);
576+
}

0 commit comments

Comments
 (0)