Skip to content

Commit

Permalink
compute cosine similarity for vertex pairs (#4482)
Browse files Browse the repository at this point in the history
compute cosine similarity for vertex pairs

Authors:
  - Naim (https://github.com/naimnv)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #4482
  • Loading branch information
naimnv authored Jun 19, 2024
1 parent f519ac1 commit e4d51b2
Show file tree
Hide file tree
Showing 15 changed files with 764 additions and 88 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,11 @@ set(CUGRAPH_SOURCES
src/link_prediction/jaccard_sg.cu
src/link_prediction/sorensen_sg.cu
src/link_prediction/overlap_sg.cu
src/link_prediction/cosine_sg.cu
src/link_prediction/jaccard_mg.cu
src/link_prediction/sorensen_mg.cu
src/link_prediction/overlap_mg.cu
src/link_prediction/cosine_mg.cu
src/layout/legacy/force_atlas2.cu
src/converters/legacy/COOtoCSR.cu
src/community/legacy/spectral_clustering.cu
Expand Down
87 changes: 87 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2082,6 +2082,37 @@ rmm::device_uvector<weight_t> jaccard_coefficients(
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
bool do_expensive_check = false);

/**
* @brief Compute Cosine similarity coefficient
*
* Similarity is computed for every pair of vertices specified. Note that
* similarity algorithms expect a symmetric graph.
*
* @throws cugraph::logic_error when an error occurs.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
* edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
* a weight of 1 for all edges.
* @param vertex_pairs tuple of device spans defining the vertex pairs to compute similarity for
* In a multi-gpu context each vertex pair should be local to this GPU.
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return similarity coefficient for the corresponding @p vertex_pairs
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
bool do_expensive_check = false);

/**
* @brief Compute Sorensen similarity coefficient
*
Expand Down Expand Up @@ -2202,6 +2233,62 @@ std::
std::optional<size_t> topk,
bool do_expensive_check = false);

/**
* @brief Compute Consine all pairs similarity coefficient
*
* Similarity is computed for all pairs of vertices. Note that in a sparse
* graph, many of the vertex pairs will have a score of zero. We actually
* compute similarity only for vertices that are two hop neighbors within
* the graph, since vertices that are not two hop neighbors will have
* a score of 0.
*
* If @p vertices is specified we will compute similarity on two hop
* neighbors the @p vertices. If @p vertices is not specified it will
* compute similarity on all two hop neighbors in the graph.
*
* If @p topk is specified only the top @p topk scoring vertex pairs
* will be returned, if not specified then scores for all computed vertex pairs
* will be returned.
*
* Note the list of two hop neighbors in the entire graph might be a large
* number of vertex pairs. If the graph is dense enough it could be as large
* as the the number of vertices squared, which might run out of memory.
*
* @throws cugraph::logic_error when an error occurs.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
* edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
* a weight of 1 for all edges.
* @param vertices optional device span defining the seed vertices. In a multi-gpu context the
* vertices should be local to this GPU.
* @param topk optional specification of the how many of the top scoring vertex pairs should be
* returned
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding
* elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
* one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
* If @p topk was specified then the vectors will be no longer than @p topk elements. In a
* multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
* will be returned on the local GPU for vertex v1.
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::
tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::optional<raft::device_span<vertex_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check = false);

/**
* @brief Compute Sorensen similarity coefficient
*
Expand Down
122 changes: 122 additions & 0 deletions cpp/src/link_prediction/cosine_mg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "link_prediction/cosine_similarity_impl.cuh"

namespace cugraph {

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int64_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, true> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int64_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

} // namespace cugraph
122 changes: 122 additions & 0 deletions cpp/src/link_prediction/cosine_sg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "link_prediction/cosine_similarity_impl.cuh"

namespace cugraph {

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<float> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>> vertex_pairs,
bool do_expensive_check);

template rmm::device_uvector<double> cosine_similarity_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::tuple<raft::device_span<int64_t const>, raft::device_span<int64_t const>> vertex_pairs,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<float>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
std::optional<raft::device_span<int64_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int32_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<double>>
cosine_similarity_all_pairs_coefficients(
raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, false, false> const& graph_view,
std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
std::optional<raft::device_span<int64_t const>> vertices,
std::optional<size_t> topk,
bool do_expensive_check);

} // namespace cugraph
Loading

0 comments on commit e4d51b2

Please sign in to comment.