diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9a9c445ed54..cf511b1e08a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -275,8 +275,6 @@ set(CUGRAPH_SOURCES src/community/ecg_mg_v64_e64.cu src/community/ecg_mg_v32_e32.cu src/community/ecg_mg_v32_e64.cu - src/community/legacy/louvain.cu - src/community/legacy/ecg.cu src/community/egonet_sg_v64_e64.cu src/community/egonet_sg_v32_e32.cu src/community/egonet_sg_v32_e64.cu @@ -639,7 +637,6 @@ add_library(cugraph_c src/c_api/induced_subgraph.cpp src/c_api/capi_helper.cu src/c_api/legacy_spectral.cpp - src/c_api/legacy_ecg.cpp src/c_api/graph_helper_sg.cu src/c_api/graph_helper_mg.cu src/c_api/graph_generators.cpp diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 4cf18f01310..8ba39fa2328 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -544,14 +544,6 @@ std::pair louvain( weight_t threshold = weight_t{1e-7}, weight_t resolution = weight_t{1}); -template -std::pair louvain( - raft::handle_t const& handle, - legacy::GraphCSRView const& graph_view, - vertex_t* clustering, - size_t max_level = 100, - weight_t resolution = weight_t{1}); - /** * @brief Louvain implementation, returning dendrogram * @@ -727,38 +719,6 @@ std::pair leiden( weight_t resolution = weight_t{1}, weight_t theta = weight_t{1}); -/** - * @brief Computes the ecg clustering of the given graph. - * - * ECG runs truncated Louvain on an ensemble of permutations of the input graph, - * then uses the ensemble partitions to determine weights for the input graph. - * The final result is found by running full Louvain on the input graph using - * the determined weights. See https://arxiv.org/abs/1809.05578 for further - * information. - * - * @throws cugraph::logic_error when an error occurs. - * - * @tparam vertex_t Type of vertex identifiers. Supported value : int (signed, - * 32-bit) - * @tparam edge_t Type of edge identifiers. Supported value : int (signed, - * 32-bit) - * @tparam weight_t Type of edge weights. Supported values : float or double. - * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, - * @param[in] graph_coo input graph object (COO) - * @param[in] graph_csr input graph object (CSR) - * @param[in] min_weight The minimum weight parameter - * @param[in] ensemble_size The ensemble size parameter - * @param[out] clustering A device pointer to array where the partitioning should be - * written - */ -template -void ecg(raft::handle_t const& handle, - legacy::GraphCSRView const& graph, - weight_t min_weight, - vertex_t ensemble_size, - vertex_t* clustering); - /** * @brief Computes the ecg clustering of the given graph. * diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index b6f59333805..67514958cef 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -212,37 +212,6 @@ cugraph_error_code_t cugraph_ecg(const cugraph_resource_handle_t* handle, cugraph_hierarchical_clustering_result_t** result, cugraph_error_t** error); -/** - * @brief Compute ECG clustering of the given graph - * - * ECG runs truncated Louvain on an ensemble of permutations of the input graph, - * then uses the ensemble partitions to determine weights for the input graph. - * The final result is found by running full Louvain on the input graph using - * the determined weights. See https://arxiv.org/abs/1809.05578 for further - * information. - * - * NOTE: This currently wraps the legacy ECG clustering implementation which is only - * available in Single GPU implementation. - * - * @param [in] handle Handle for accessing resources - * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage - * @param [in] min_weight The minimum weight parameter - * @param [in] ensemble_size The ensemble size parameter - * @param [in] do_expensive_check - * A flag to run expensive checks for input arguments (if set to true) - * @param [out] result The result from the clustering algorithm - * @param [out] error Pointer to an error object storing details of any error. Will - * be populated if error code is not CUGRAPH_SUCCESS - * @return error code - */ -cugraph_error_code_t cugraph_legacy_ecg(const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool_t do_expensive_check, - cugraph_hierarchical_clustering_result_t** result, - cugraph_error_t** error); - /** * @brief Extract ego graphs * diff --git a/cpp/src/c_api/legacy_ecg.cpp b/cpp/src/c_api/legacy_ecg.cpp deleted file mode 100644 index 6fee219f303..00000000000 --- a/cpp/src/c_api/legacy_ecg.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "c_api/abstract_functor.hpp" -#include "c_api/graph.hpp" -#include "c_api/hierarchical_clustering_result.hpp" -#include "c_api/resource_handle.hpp" -#include "c_api/utils.hpp" - -#include - -#include -#include -#include -#include - -#include - -namespace { - -struct legacy_ecg_functor : public cugraph::c_api::abstract_functor { - raft::handle_t const& handle_; - cugraph::c_api::cugraph_graph_t* graph_; - double min_weight_; - size_t ensemble_size_; - bool do_expensive_check_; - cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{}; - - legacy_ecg_functor(::cugraph_resource_handle_t const* handle, - ::cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool do_expensive_check) - : abstract_functor(), - handle_(*reinterpret_cast(handle)->handle_), - graph_(reinterpret_cast(graph)), - min_weight_(min_weight), - ensemble_size_(ensemble_size), - do_expensive_check_(do_expensive_check) - { - } - - template - void operator()() - { - if constexpr (!cugraph::is_candidate::value) { - unsupported(); - } else if constexpr (multi_gpu) { - unsupported(); - } else if constexpr (!std::is_same_v) { - unsupported(); - } else { - // ecg expects store_transposed == false - if constexpr (store_transposed) { - error_code_ = - cugraph::c_api::transpose_storage( - handle_, graph_, error_.get()); - if (error_code_ != CUGRAPH_SUCCESS) return; - } - - auto graph = - reinterpret_cast*>(graph_->graph_); - - auto edge_weights = reinterpret_cast< - cugraph::edge_property_t, weight_t>*>( - graph_->edge_weights_); - - auto number_map = reinterpret_cast*>(graph_->number_map_); - - auto graph_view = graph->view(); - - auto edge_partition_view = graph_view.local_edge_partition_view(); - - cugraph::legacy::GraphCSRView legacy_graph_view( - const_cast(edge_partition_view.offsets().data()), - const_cast(edge_partition_view.indices().data()), - const_cast(edge_weights->view().value_firsts().front()), - edge_partition_view.offsets().size() - 1, - edge_partition_view.indices().size()); - - rmm::device_uvector clusters(graph_view.local_vertex_partition_range_size(), - handle_.get_stream()); - - // FIXME: Need modularity..., although currently not used - cugraph::ecg(handle_, - legacy_graph_view, - static_cast(min_weight_), - static_cast(ensemble_size_), - clusters.data()); - - rmm::device_uvector vertices(graph_view.local_vertex_partition_range_size(), - handle_.get_stream()); - raft::copy(vertices.data(), number_map->data(), vertices.size(), handle_.get_stream()); - - result_ = new cugraph::c_api::cugraph_hierarchical_clustering_result_t{ - weight_t{0}, - new cugraph::c_api::cugraph_type_erased_device_array_t(vertices, graph_->vertex_type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(clusters, graph_->vertex_type_)}; - } - } -}; - -} // namespace - -extern "C" cugraph_error_code_t cugraph_legacy_ecg( - const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool_t do_expensive_check, - cugraph_hierarchical_clustering_result_t** result, - cugraph_error_t** error) -{ - legacy_ecg_functor functor(handle, graph, min_weight, ensemble_size, do_expensive_check); - - return cugraph::c_api::run_algorithm(graph, functor, result, error); -} diff --git a/cpp/src/community/legacy/ecg.cu b/cpp/src/community/legacy/ecg.cu deleted file mode 100644 index b2ad79204ed..00000000000 --- a/cpp/src/community/legacy/ecg.cu +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "community/legacy/louvain.cuh" -#include "utilities/graph_utils.cuh" - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include - -namespace { -template -__device__ IndexType -binsearch_maxle(const IndexType* vec, const IndexType val, IndexType low, IndexType high) -{ - while (true) { - if (low == high) return low; // we know it exists - if ((low + 1) == high) return (vec[high] <= val) ? high : low; - - IndexType mid = low + (high - low) / 2; - - if (vec[mid] > val) - high = mid - 1; - else - low = mid; - } -} - -// FIXME: This shouldn't need to be a custom kernel, this -// seems like it should just be a thrust::transform -template -__global__ static void match_check_kernel( - IdxT size, IdxT num_verts, IdxT* offsets, IdxT* indices, IdxT* parts, ValT* weights) -{ - IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; - while (tid < size) { - IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts); - IdxT dest = indices[tid]; - if (parts[source] == parts[dest]) weights[tid] += 1; - tid += gridDim.x * blockDim.x; - } -} - -struct prg { - __device__ float operator()(int n) - { - thrust::default_random_engine rng; - thrust::uniform_real_distribution dist(0.0, 1.0); - rng.discard(n); - return dist(rng); - } -}; - -template -struct update_functor { - ValT min_value; - ValT ensemble_size; - update_functor(ValT minv, ValT es) : min_value(minv), ensemble_size(es) {} - __host__ __device__ ValT operator()(ValT input) - { - return min_value + (1 - min_value) * (input / ensemble_size); - } -}; - -/** - * Computes a random permutation vector of length size. A permutation vector of length n - * contains all values [0..n-1] exactly once. - * @param size The length of the permutation vector to generate - * @param seed A seed value for the random number generator, the generator will discard this many - * values before using values. Calling this method with the same seed will result in the same - * permutation vector. - * @return A pointer to memory containing the requested permutation vector. The caller is - * responsible for freeing the allocated memory using ALLOC_FREE_TRY(). - */ -template -void get_permutation_vector(T size, T seed, T* permutation, rmm::cuda_stream_view stream_view) -{ - rmm::device_uvector randoms_v(size, stream_view); - - thrust::counting_iterator index(seed); - thrust::transform(rmm::exec_policy(stream_view), index, index + size, randoms_v.begin(), prg()); - thrust::sequence(rmm::exec_policy(stream_view), permutation, permutation + size, 0); - thrust::sort_by_key( - rmm::exec_policy(stream_view), randoms_v.begin(), randoms_v.end(), permutation); -} - -template -class EcgLouvain : public cugraph::legacy::Louvain { - public: - using graph_t = graph_type; - using vertex_t = typename graph_type::vertex_type; - using edge_t = typename graph_type::edge_type; - using weight_t = typename graph_type::weight_type; - - EcgLouvain(raft::handle_t const& handle, graph_type const& graph, vertex_t seed) - : cugraph::legacy::Louvain(handle, graph), seed_(seed) - { - } - - void initialize_dendrogram_level(vertex_t num_vertices) override - { - this->dendrogram_->add_level(0, num_vertices, this->handle_.get_stream()); - - get_permutation_vector( - num_vertices, seed_, this->dendrogram_->current_level_begin(), this->handle_.get_stream()); - } - - private: - vertex_t seed_; -}; - -} // anonymous namespace - -namespace cugraph { - -template -void ecg(raft::handle_t const& handle, - legacy::GraphCSRView const& graph, - weight_t min_weight, - vertex_t ensemble_size, - vertex_t* clustering) -{ - using graph_type = legacy::GraphCSRView; - - CUGRAPH_EXPECTS(graph.edge_data != nullptr, - "Invalid input argument: ecg expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, - "Invalid input argument: clustering is NULL, should be a device pointer to " - "memory for storing the result"); - - rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - graph.edge_data, - graph.edge_data + graph.number_of_edges, - ecg_weights_v.data()); - - vertex_t size{graph.number_of_vertices}; - - // FIXME: This seed should be a parameter - vertex_t seed{1}; - - // Iterate over each member of the ensemble - for (vertex_t i = 0; i < ensemble_size; i++) { - EcgLouvain runner(handle, graph, seed); - seed += size; - - weight_t wt = runner(size_t{1}, weight_t{1}); - - // For each edge in the graph determine whether the endpoints are in the same partition - // Keep a sum for each edge of the total number of times its endpoints are in the same partition - dim3 grid, block; - block.x = 512; - grid.x = min(vertex_t{CUDA_MAX_BLOCKS}, (graph.number_of_edges / 512 + 1)); - match_check_kernel<<>>( - graph.number_of_edges, - graph.number_of_vertices, - graph.offsets, - graph.indices, - runner.get_dendrogram().get_level_ptr_nocheck(0), - ecg_weights_v.data()); - } - - // Set weights = min_weight + (1 - min-weight)*sum/ensemble_size - update_functor uf(min_weight, ensemble_size); - thrust::transform(handle.get_thrust_policy(), - ecg_weights_v.begin(), - ecg_weights_v.end(), - ecg_weights_v.begin(), - uf); - - // Run Louvain on the original graph using the computed weights - // (pass max_level = 100 for a "full run") - legacy::GraphCSRView louvain_graph; - louvain_graph.indices = graph.indices; - louvain_graph.offsets = graph.offsets; - louvain_graph.edge_data = ecg_weights_v.data(); - louvain_graph.number_of_vertices = graph.number_of_vertices; - louvain_graph.number_of_edges = graph.number_of_edges; - - cugraph::louvain(handle, louvain_graph, clustering, size_t{100}); -} - -// Explicit template instantiations. -template void ecg( - raft::handle_t const&, - legacy::GraphCSRView const& graph, - float min_weight, - int32_t ensemble_size, - int32_t* clustering); -template void ecg( - raft::handle_t const&, - legacy::GraphCSRView const& graph, - double min_weight, - int32_t ensemble_size, - int32_t* clustering); -} // namespace cugraph diff --git a/cpp/src/community/legacy/louvain.cu b/cpp/src/community/legacy/louvain.cu deleted file mode 100644 index d725ecc3dd1..00000000000 --- a/cpp/src/community/legacy/louvain.cu +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "community/flatten_dendrogram.hpp" -#include "community/legacy/louvain.cuh" - -#include - -#include - -#include - -namespace cugraph { - -namespace detail { - -template -void check_clustering(legacy::GraphCSRView const& graph_view, - vertex_t* clustering) -{ - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); -} - -template -std::pair>, weight_t> louvain( - raft::handle_t const& handle, - legacy::GraphCSRView const& graph_view, - size_t max_level, - weight_t resolution) -{ - CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, - "Invalid input argument: louvain expects a weighted graph"); - - legacy::Louvain> runner(handle, graph_view); - weight_t wt = runner(max_level, resolution); - - return std::make_pair(runner.move_dendrogram(), wt); -} - -template -void flatten_dendrogram(raft::handle_t const& handle, - legacy::GraphCSRView const& graph_view, - Dendrogram const& dendrogram, - vertex_t* clustering) -{ - rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); - - thrust::sequence( - handle.get_thrust_policy(), vertex_ids_v.begin(), vertex_ids_v.end(), vertex_t{0}); - - partition_at_level( - handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); -} - -} // namespace detail - -template -std::pair louvain( - raft::handle_t const& handle, - legacy::GraphCSRView const& graph_view, - vertex_t* clustering, - size_t max_level, - weight_t resolution) -{ - CUGRAPH_EXPECTS(graph_view.has_data(), "Graph must be weighted"); - detail::check_clustering(graph_view, clustering); - - std::unique_ptr> dendrogram; - weight_t modularity; - - std::tie(dendrogram, modularity) = detail::louvain(handle, graph_view, max_level, resolution); - - detail::flatten_dendrogram(handle, graph_view, *dendrogram, clustering); - - return std::make_pair(dendrogram->num_levels(), modularity); -} - -// Explicit template instantations -template std::pair louvain(raft::handle_t const&, - legacy::GraphCSRView const&, - int32_t*, - size_t, - float); -template std::pair louvain(raft::handle_t const&, - legacy::GraphCSRView const&, - int32_t*, - size_t, - double); -} // namespace cugraph diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh deleted file mode 100644 index 53d0b231c03..00000000000 --- a/cpp/src/community/legacy/louvain.cuh +++ /dev/null @@ -1,666 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -// #define TIMING - -#include "converters/legacy/COOtoCSR.cuh" -#include "utilities/graph_utils.cuh" - -#include -#include - -#ifdef TIMING -#include -#endif - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace cugraph { -namespace legacy { - -template -class Louvain { - public: - using graph_t = graph_type; - using vertex_t = typename graph_type::vertex_type; - using edge_t = typename graph_type::edge_type; - using weight_t = typename graph_type::weight_type; - - Louvain(raft::handle_t const& handle, graph_type const& graph) - : -#ifdef TIMING - hr_timer_(), -#endif - handle_(handle), - dendrogram_(std::make_unique>()), - - // FIXME: Don't really need to copy here but would need - // to change the logic to populate this properly - // in generate_superverticies_graph. - // - offsets_v_(graph.number_of_vertices + 1, handle.get_stream()), - indices_v_(graph.number_of_edges, handle.get_stream()), - weights_v_(graph.number_of_edges, handle.get_stream()), - src_indices_v_(graph.number_of_edges, handle.get_stream()), - vertex_weights_v_(graph.number_of_vertices, handle.get_stream()), - cluster_weights_v_(graph.number_of_vertices, handle.get_stream()), - tmp_arr_v_(graph.number_of_vertices, handle.get_stream()), - cluster_inverse_v_(graph.number_of_vertices, handle.get_stream()), - number_of_vertices_(graph.number_of_vertices), - number_of_edges_(graph.number_of_edges) - { - thrust::copy(handle.get_thrust_policy(), - graph.offsets, - graph.offsets + graph.number_of_vertices + 1, - offsets_v_.begin()); - - thrust::copy(handle.get_thrust_policy(), - graph.indices, - graph.indices + graph.number_of_edges, - indices_v_.begin()); - - thrust::copy(handle.get_thrust_policy(), - graph.edge_data, - graph.edge_data + graph.number_of_edges, - weights_v_.begin()); - } - - virtual ~Louvain() {} - - weight_t modularity(weight_t total_edge_weight, - weight_t resolution, - graph_t const& graph, - vertex_t const* d_cluster) - { - vertex_t n_verts = graph.number_of_vertices; - - rmm::device_uvector inc(n_verts, handle_.get_stream()); - rmm::device_uvector deg(n_verts, handle_.get_stream()); - - thrust::fill(handle_.get_thrust_policy(), inc.begin(), inc.end(), weight_t{0.0}); - thrust::fill(handle_.get_thrust_policy(), deg.begin(), deg.end(), weight_t{0.0}); - - // FIXME: Already have weighted degree computed in main loop, - // could pass that in rather than computing d_deg... which - // would save an atomicAdd (synchronization) - // - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_inc = inc.data(), - d_deg = deg.data(), - d_offsets = graph.offsets, - d_indices = graph.indices, - d_weights = graph.edge_data, - d_cluster] __device__(vertex_t v) { - vertex_t community = d_cluster[v]; - weight_t increase{0.0}; - weight_t degree{0.0}; - - for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { - vertex_t neighbor = d_indices[loc]; - degree += d_weights[loc]; - if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } - } - - if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); - if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); - }); - - weight_t Q = thrust::transform_reduce( - handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - cuda::proclaim_return_type( - [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( - vertex_t community) -> weight_t { - return ((d_inc[community] / total_edge_weight) - - resolution * (d_deg[community] * d_deg[community]) / - (total_edge_weight * total_edge_weight)); - }), - weight_t{0.0}, - thrust::plus()); - - return Q; - } - - Dendrogram const& get_dendrogram() const { return *dendrogram_; } - - Dendrogram& get_dendrogram() { return *dendrogram_; } - - std::unique_ptr> move_dendrogram() { return std::move(dendrogram_); } - - virtual weight_t operator()(size_t max_level, weight_t resolution) - { - weight_t total_edge_weight = - thrust::reduce(handle_.get_thrust_policy(), weights_v_.begin(), weights_v_.end()); - - weight_t best_modularity = weight_t{-1}; - - // - // Our copy of the graph. Each iteration of the outer loop will - // shrink this copy of the graph. - // - legacy::GraphCSRView current_graph(offsets_v_.data(), - indices_v_.data(), - weights_v_.data(), - number_of_vertices_, - number_of_edges_); - - current_graph.get_source_indices(src_indices_v_.data()); - - while (dendrogram_->num_levels() < max_level) { - // - // Initialize every cluster to reference each vertex to itself - // - initialize_dendrogram_level(current_graph.number_of_vertices); - - compute_vertex_and_cluster_weights(current_graph); - - weight_t new_Q = update_clustering(total_edge_weight, resolution, current_graph); - - if (new_Q <= best_modularity) { break; } - - best_modularity = new_Q; - - shrink_graph(current_graph); - } - - timer_display_and_clear(std::cout); - - return best_modularity; - } - - protected: - void timer_start(std::string const& region) - { -#ifdef TIMING - hr_timer_.start(region); -#endif - } - - void timer_stop(rmm::cuda_stream_view stream_view) - { -#ifdef TIMING - stream_view.synchronize(); - hr_timer_.stop(); -#endif - } - - void timer_display_and_clear(std::ostream& os) - { -#ifdef TIMING - hr_timer_.display_and_clear(os); -#endif - } - - virtual void initialize_dendrogram_level(vertex_t num_vertices) - { - dendrogram_->add_level(0, num_vertices, handle_.get_stream()); - - thrust::sequence(handle_.get_thrust_policy(), - dendrogram_->current_level_begin(), - dendrogram_->current_level_end()); - } - - public: - void compute_vertex_and_cluster_weights(graph_type const& graph) - { - timer_start("compute_vertex_and_cluster_weights"); - - edge_t const* d_offsets = graph.offsets; - vertex_t const* d_indices = graph.indices; - weight_t const* d_weights = graph.edge_data; - weight_t* d_vertex_weights = vertex_weights_v_.data(); - weight_t* d_cluster_weights = cluster_weights_v_.data(); - - // - // MNMG: per_v_transform_reduce_outgoing_e, then copy - // - thrust::for_each( - handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_offsets, d_indices, d_weights, d_vertex_weights, d_cluster_weights] __device__( - vertex_t src) { - weight_t sum = - thrust::reduce(thrust::seq, d_weights + d_offsets[src], d_weights + d_offsets[src + 1]); - - d_vertex_weights[src] = sum; - d_cluster_weights[src] = sum; - }); - - timer_stop(handle_.get_stream()); - } - - virtual weight_t update_clustering(weight_t total_edge_weight, - weight_t resolution, - graph_type const& graph) - { - timer_start("update_clustering"); - - rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), - handle_.get_stream()); - rmm::device_uvector delta_Q_v(graph.number_of_edges, handle_.get_stream()); - rmm::device_uvector cluster_hash_v(graph.number_of_edges, handle_.get_stream()); - rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, handle_.get_stream()); - - vertex_t* d_cluster = dendrogram_->current_level_begin(); - weight_t const* d_vertex_weights = vertex_weights_v_.data(); - weight_t* d_cluster_weights = cluster_weights_v_.data(); - weight_t* d_delta_Q = delta_Q_v.data(); - - thrust::copy(handle_.get_thrust_policy(), - dendrogram_->current_level_begin(), - dendrogram_->current_level_end(), - next_cluster_v.data()); - - weight_t new_Q = - modularity(total_edge_weight, resolution, graph, dendrogram_->current_level_begin()); - - weight_t cur_Q = new_Q - 1; - - // To avoid the potential of having two vertices swap clusters - // we will only allow vertices to move up (true) or down (false) - // during each iteration of the loop - bool up_down = true; - - while (new_Q > (cur_Q + 0.0001)) { - cur_Q = new_Q; - - compute_delta_modularity( - total_edge_weight, resolution, graph, cluster_hash_v, old_cluster_sum_v, delta_Q_v); - - assign_nodes(graph, cluster_hash_v, next_cluster_v, delta_Q_v, up_down); - - up_down = !up_down; - - new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); - - if (new_Q > cur_Q) { - thrust::copy(handle_.get_thrust_policy(), - next_cluster_v.begin(), - next_cluster_v.end(), - dendrogram_->current_level_begin()); - } - } - - timer_stop(handle_.get_stream()); - return cur_Q; - } - - void compute_delta_modularity(weight_t total_edge_weight, - weight_t resolution, - graph_type const& graph, - rmm::device_uvector& cluster_hash_v, - rmm::device_uvector& old_cluster_sum_v, - rmm::device_uvector& delta_Q_v) - { - edge_t const* d_offsets = graph.offsets; - weight_t const* d_weights = graph.edge_data; - vertex_t const* d_cluster = dendrogram_->current_level_begin(); - weight_t const* d_vertex_weights = vertex_weights_v_.data(); - weight_t const* d_cluster_weights = cluster_weights_v_.data(); - - vertex_t* d_cluster_hash = cluster_hash_v.data(); - weight_t* d_delta_Q = delta_Q_v.data(); - weight_t* d_old_cluster_sum = old_cluster_sum_v.data(); - weight_t* d_new_cluster_sum = d_delta_Q; - - thrust::fill( - handle_.get_thrust_policy(), cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill(handle_.get_thrust_policy(), delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(handle_.get_thrust_policy(), - old_cluster_sum_v.begin(), - old_cluster_sum_v.end(), - weight_t{0.0}); - - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices = src_indices_v_.data(), - d_dst_indices = graph.indices, - d_cluster, - d_offsets, - d_cluster_hash, - d_new_cluster_sum, - d_weights, - d_old_cluster_sum] __device__(edge_t loc) { - vertex_t src = d_src_indices[loc]; - vertex_t dst = d_dst_indices[loc]; - - if (src != dst) { - vertex_t old_cluster = d_cluster[src]; - vertex_t new_cluster = d_cluster[dst]; - edge_t hash_base = d_offsets[src]; - edge_t n_edges = d_offsets[src + 1] - hash_base; - - int h = (new_cluster % n_edges); - edge_t offset = hash_base + h; - while (d_cluster_hash[offset] != new_cluster) { - if (d_cluster_hash[offset] == -1) { - atomicCAS(d_cluster_hash + offset, -1, new_cluster); - } else { - h = (h + 1) % n_edges; - offset = hash_base + h; - } - } - - atomicAdd(d_new_cluster_sum + offset, d_weights[loc]); - - if (old_cluster == new_cluster) - atomicAdd(d_old_cluster_sum + src, d_weights[loc]); - } - }); - - thrust::for_each( - handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [total_edge_weight, - resolution, - d_cluster_hash, - d_src_indices = src_indices_v_.data(), - d_cluster, - d_vertex_weights, - d_delta_Q, - d_new_cluster_sum, - d_old_cluster_sum, - d_cluster_weights] __device__(edge_t loc) { - vertex_t new_cluster = d_cluster_hash[loc]; - if (new_cluster >= 0) { - vertex_t src = d_src_indices[loc]; - vertex_t old_cluster = d_cluster[src]; - weight_t k_k = d_vertex_weights[src]; - weight_t a_old = d_cluster_weights[old_cluster]; - weight_t a_new = d_cluster_weights[new_cluster]; - - // NOTE: d_delta_Q and d_new_cluster_sum are aliases - // for same device array to save memory - d_delta_Q[loc] = - 2 * (((d_new_cluster_sum[loc] - d_old_cluster_sum[src]) / total_edge_weight) - - resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / - (total_edge_weight * total_edge_weight)); - } else { - d_delta_Q[loc] = weight_t{0.0}; - } - }); - } - - void assign_nodes(graph_type const& graph, - rmm::device_uvector& cluster_hash_v, - rmm::device_uvector& next_cluster_v, - rmm::device_uvector& delta_Q_v, - bool up_down) - { - rmm::device_uvector temp_vertices_v(graph.number_of_vertices, handle_.get_stream()); - rmm::device_uvector temp_cluster_v(graph.number_of_vertices, handle_.get_stream()); - rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, handle_.get_stream()); - - thrust::fill( - handle_.get_thrust_policy(), temp_cluster_v.begin(), temp_cluster_v.end(), vertex_t{-1}); - - thrust::fill( - handle_.get_thrust_policy(), temp_delta_Q_v.begin(), temp_delta_Q_v.end(), weight_t{0}); - - auto cluster_reduce_iterator = - thrust::make_zip_iterator(thrust::make_tuple(cluster_hash_v.begin(), delta_Q_v.begin())); - - auto output_edge_iterator2 = - thrust::make_zip_iterator(thrust::make_tuple(temp_cluster_v.begin(), temp_delta_Q_v.begin())); - - auto cluster_reduce_end = - thrust::reduce_by_key(handle_.get_thrust_policy(), - src_indices_v_.begin(), - src_indices_v_.end(), - cluster_reduce_iterator, - temp_vertices_v.data(), - output_edge_iterator2, - thrust::equal_to(), - [] __device__(auto pair1, auto pair2) { - if (thrust::get<1>(pair1) > thrust::get<1>(pair2)) - return pair1; - else if ((thrust::get<1>(pair1) == thrust::get<1>(pair2)) && - (thrust::get<0>(pair1) < thrust::get<0>(pair2))) - return pair1; - else - return pair2; - }); - - vertex_t final_size = thrust::distance(temp_vertices_v.data(), cluster_reduce_end.first); - - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_size), - [up_down, - d_temp_delta_Q = temp_delta_Q_v.data(), - d_next_cluster = next_cluster_v.data(), - d_temp_vertices = temp_vertices_v.data(), - d_vertex_weights = vertex_weights_v_.data(), - d_temp_clusters = temp_cluster_v.data(), - d_cluster_weights = cluster_weights_v_.data()] __device__(vertex_t id) { - if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { - vertex_t new_cluster = d_temp_clusters[id]; - vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; - - if ((new_cluster > old_cluster) == up_down) { - weight_t src_weight = d_vertex_weights[d_temp_vertices[id]]; - d_next_cluster[d_temp_vertices[id]] = d_temp_clusters[id]; - - atomicAdd(d_cluster_weights + new_cluster, src_weight); - atomicAdd(d_cluster_weights + old_cluster, -src_weight); - } - } - }); - } - - void shrink_graph(graph_t& graph) - { - timer_start("shrinking graph"); - - // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(); - cluster_weights_v_.resize(num_clusters, handle_.get_stream()); - - // shrink our graph to represent the graph of supervertices - generate_superverticies_graph(graph, num_clusters); - - timer_stop(handle_.get_stream()); - } - - vertex_t renumber_clusters() - { - vertex_t* d_tmp_array = tmp_arr_v_.data(); - vertex_t* d_cluster_inverse = cluster_inverse_v_.data(); - vertex_t* d_cluster = dendrogram_->current_level_begin(); - - vertex_t old_num_clusters = dendrogram_->current_level_size(); - - // - // New technique. Initialize cluster_inverse_v_ to 0 - // - thrust::fill(handle_.get_thrust_policy(), - cluster_inverse_v_.begin(), - cluster_inverse_v_.end(), - vertex_t{0}); - - // - // Iterate over every element c in the current clustering and set cluster_inverse_v to 1 - // - auto first_1 = thrust::make_constant_iterator(1); - auto last_1 = first_1 + old_num_clusters; - - thrust::scatter(handle_.get_thrust_policy(), - first_1, - last_1, - dendrogram_->current_level_begin(), - cluster_inverse_v_.begin()); - - // - // Now we'll copy all of the clusters that have a value of 1 into a temporary array - // - auto copy_end = thrust::copy_if( - handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(old_num_clusters), - tmp_arr_v_.begin(), - [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); - - vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); - tmp_arr_v_.resize(new_num_clusters, handle_.get_stream()); - - // - // Now we can set each value in cluster_inverse of a cluster to its index - // - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(new_num_clusters), - [d_cluster_inverse, d_tmp_array] __device__(const vertex_t idx) { - d_cluster_inverse[d_tmp_array[idx]] = idx; - }); - - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(old_num_clusters), - [d_cluster, d_cluster_inverse] __device__(vertex_t i) { - d_cluster[i] = d_cluster_inverse[d_cluster[i]]; - }); - - cluster_inverse_v_.resize(new_num_clusters, handle_.get_stream()); - - return new_num_clusters; - } - - void generate_superverticies_graph(graph_t& graph, vertex_t num_clusters) - { - rmm::device_uvector new_src_v(graph.number_of_edges, handle_.get_stream()); - rmm::device_uvector new_dst_v(graph.number_of_edges, handle_.get_stream()); - rmm::device_uvector new_weight_v(graph.number_of_edges, handle_.get_stream()); - - // - // Renumber the COO - // - thrust::for_each(handle_.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [d_old_src = src_indices_v_.data(), - d_old_dst = graph.indices, - d_old_weight = graph.edge_data, - d_new_src = new_src_v.data(), - d_new_dst = new_dst_v.data(), - d_new_weight = new_weight_v.data(), - d_clusters = dendrogram_->current_level_begin()] __device__(edge_t e) { - d_new_src[e] = d_clusters[d_old_src[e]]; - d_new_dst[e] = d_clusters[d_old_dst[e]]; - d_new_weight[e] = d_old_weight[e]; - }); - - thrust::stable_sort_by_key( - handle_.get_thrust_policy(), - new_dst_v.begin(), - new_dst_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_weight_v.begin()))); - thrust::stable_sort_by_key( - handle_.get_thrust_policy(), - new_src_v.begin(), - new_src_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(new_dst_v.begin(), new_weight_v.begin()))); - - // - // Now we reduce by key to combine the weights of duplicate - // edges. - // - auto start = - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())); - auto new_start = - thrust::make_zip_iterator(thrust::make_tuple(src_indices_v_.data(), graph.indices)); - auto new_end = thrust::reduce_by_key(handle_.get_thrust_policy(), - start, - start + graph.number_of_edges, - new_weight_v.begin(), - new_start, - graph.edge_data, - thrust::equal_to>(), - thrust::plus()); - - graph.number_of_edges = thrust::distance(new_start, new_end.first); - graph.number_of_vertices = num_clusters; - - detail::fill_offset(src_indices_v_.data(), - graph.offsets, - num_clusters, - graph.number_of_edges, - handle_.get_stream()); - - src_indices_v_.resize(graph.number_of_edges, handle_.get_stream()); - indices_v_.resize(graph.number_of_edges, handle_.get_stream()); - weights_v_.resize(graph.number_of_edges, handle_.get_stream()); - } - - protected: - raft::handle_t const& handle_; - vertex_t number_of_vertices_; - edge_t number_of_edges_; - - std::unique_ptr> dendrogram_; - - // - // Copy of graph - // - rmm::device_uvector offsets_v_; - rmm::device_uvector indices_v_; - rmm::device_uvector weights_v_; - rmm::device_uvector src_indices_v_; - - // - // Weights and clustering across iterations of algorithm - // - rmm::device_uvector vertex_weights_v_; - rmm::device_uvector cluster_weights_v_; - - // - // Temporaries used within kernels. Each iteration uses less - // of this memory - // - rmm::device_uvector tmp_arr_v_; - rmm::device_uvector cluster_inverse_v_; - -#ifdef TIMING - HighResTimer hr_timer_; -#endif -}; - -} // namespace legacy -} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3ad27b503a4..103953dd5d2 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -330,10 +330,6 @@ ConfigureTest(LEIDEN_TEST community/leiden_test.cpp) # - WEIGHTED MATCHING tests ----------------------------------------------------------------------- ConfigureTest(WEIGHTED_MATCHING_TEST community/weighted_matching_test.cpp) -################################################################################################### -# - Legacy ECG tests ------------------------------------------------------------------------------ -ConfigureTest(LEGACY_ECG_TEST community/legacy_ecg_test.cpp) - ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) @@ -822,7 +818,6 @@ ConfigureCTest(CAPI_ECG_TEST c_api/ecg_test.c) # xref: https://github.com/rapidsai/raft/issues/2186 # ############################################################################# # ConfigureCTest(CAPI_LEGACY_SPECTRAL_TEST c_api/legacy_spectral_test.c) -ConfigureCTest(CAPI_LEGACY_ECG_TEST c_api/legacy_ecg_test.c) ConfigureCTest(CAPI_CORE_NUMBER_TEST c_api/core_number_test.c) ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c) ConfigureCTest(CAPI_K_CORE_TEST c_api/k_core_test.c) diff --git a/cpp/tests/c_api/legacy_ecg_test.c b/cpp/tests/c_api/legacy_ecg_test.c deleted file mode 100644 index b702426b0aa..00000000000 --- a/cpp/tests/c_api/legacy_ecg_test.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "c_test_utils.h" /* RUN_TEST */ - -#include -#include - -#include - -typedef int32_t vertex_t; -typedef int32_t edge_t; -typedef float weight_t; - -int generic_ecg_test(vertex_t* h_src, - vertex_t* h_dst, - weight_t* h_wgt, - vertex_t* h_result, - size_t num_vertices, - size_t num_edges, - double minimum_weight, - size_t ensemble_size, - bool_t store_transposed) -{ - int test_ret_value = 0; - - cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; - cugraph_error_t* ret_error; - - cugraph_resource_handle_t* p_handle = NULL; - cugraph_graph_t* p_graph = NULL; - cugraph_hierarchical_clustering_result_t* p_result = NULL; - - p_handle = cugraph_create_resource_handle(NULL); - TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); - - ret_code = create_test_graph( - p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error); - - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); - TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - - ret_code = cugraph_legacy_ecg( - p_handle, p_graph, minimum_weight, ensemble_size, FALSE, &p_result, &ret_error); - - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_ecg failed."); - - if (test_ret_value == 0) { - cugraph_type_erased_device_array_view_t* vertices; - cugraph_type_erased_device_array_view_t* clusters; - - vertices = cugraph_hierarchical_clustering_result_get_vertices(p_result); - clusters = cugraph_hierarchical_clustering_result_get_clusters(p_result); - - vertex_t h_vertices[num_vertices]; - edge_t h_clusters[num_vertices]; - - ret_code = cugraph_type_erased_device_array_view_copy_to_host( - p_handle, (byte_t*)h_vertices, vertices, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = cugraph_type_erased_device_array_view_copy_to_host( - p_handle, (byte_t*)h_clusters, clusters, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { - TEST_ASSERT( - test_ret_value, h_result[h_vertices[i]] == h_clusters[i], "cluster results don't match"); - } - - cugraph_hierarchical_clustering_result_free(p_result); - } - - cugraph_sg_graph_free(p_graph); - cugraph_free_resource_handle(p_handle); - cugraph_error_free(ret_error); - - return test_ret_value; -} - -int test_ecg() -{ - size_t num_edges = 8; - size_t num_vertices = 6; - weight_t min_weight = 0.05; - size_t ensemble_size = 16; - - vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; - vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; - weight_t h_wgt[] = { - 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_result[] = {0, 1, 0, 1, 1, 1}; - - // Louvain wants store_transposed = FALSE - return generic_ecg_test( - h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, min_weight, ensemble_size, FALSE); -} - -/******************************************************************************/ - -int main(int argc, char** argv) -{ - int result = 0; - result |= RUN_TEST(test_ecg); - return result; -} diff --git a/cpp/tests/community/legacy_ecg_test.cpp b/cpp/tests/community/legacy_ecg_test.cpp deleted file mode 100644 index c1b35cb975b..00000000000 --- a/cpp/tests/community/legacy_ecg_test.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ -#include "utilities/conversion_utilities.hpp" - -#include -#include - -#include - -#include - -#include - -// FIXME: Temporarily disable this test. Something is wrong with -// ECG, or the expectation of this test. If I run ensemble size -// of 24 this fails. It also fails with the SG Louvain change -// for PR 1271 -#if 0 -TEST(legacy_ecg, success) -{ - // FIXME: verify that this is the karate dataset - std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, - 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, - 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; - std::vector ind_h = { - 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, - 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, - 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, - 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, - 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, - 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, - 33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32}; - - std::vector w_h(ind_h.size(), float{1.0}); - - int num_verts = off_h.size() - 1; - int num_edges = ind_h.size(); - - thrust::host_vector cluster_id(num_verts, -1); - - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); - - cugraph::legacy::GraphCSRView graph_csr( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); - - raft::handle_t handle; - cugraph::ecg(handle, graph_csr, .05, 16, result_v.data().get()); - - cluster_id = result_v; - int max = *max_element(cluster_id.begin(), cluster_id.end()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); - - ASSERT_EQ((min >= 0), 1); - - std::set cluster_ids; - for (auto c : cluster_id) { cluster_ids.insert(c); } - - ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); - - float modularity{0.0}; - - cugraph::ext_raft::analyzeClustering_modularity( - graph_csr, max + 1, result_v.data().get(), &modularity); - - // 0.399 is 5% below the reference value returned in - // /python/utils/ECG_Golden.ipynb on the same dataset - ASSERT_GT(modularity, 0.399); -} -#endif - -TEST(legacy_ecg, dolphin) -{ - raft::handle_t handle; - - auto stream = handle.get_stream(); - - std::vector off_h = {0, 6, 14, 18, 21, 22, 26, 32, 37, 43, 50, 55, 56, - 57, 65, 77, 84, 90, 99, 106, 110, 119, 125, 126, 129, 135, - 138, 141, 146, 151, 160, 165, 166, 169, 179, 184, 185, 192, 203, - 211, 213, 221, 226, 232, 239, 243, 254, 256, 262, 263, 265, 272, - 282, 286, 288, 295, 297, 299, 308, 309, 314, 315, 318}; - std::vector ind_h = { - 10, 14, 15, 40, 42, 47, 17, 19, 26, 27, 28, 36, 41, 54, 10, 42, 44, 61, 8, 14, 59, 51, 9, - 13, 56, 57, 9, 13, 17, 54, 56, 57, 19, 27, 30, 40, 54, 3, 20, 28, 37, 45, 59, 5, 6, 13, - 17, 32, 41, 57, 0, 2, 29, 42, 47, 51, 33, 5, 6, 9, 17, 32, 41, 54, 57, 0, 3, 16, 24, - 33, 34, 37, 38, 40, 43, 50, 52, 0, 18, 24, 40, 45, 55, 59, 14, 20, 33, 37, 38, 50, 1, 6, - 9, 13, 22, 25, 27, 31, 57, 15, 20, 21, 24, 29, 45, 51, 1, 7, 30, 54, 8, 16, 18, 28, 36, - 38, 44, 47, 50, 18, 29, 33, 37, 45, 51, 17, 36, 45, 51, 14, 15, 18, 29, 45, 51, 17, 26, 27, - 1, 25, 27, 1, 7, 17, 25, 26, 1, 8, 20, 30, 47, 10, 18, 21, 24, 35, 43, 45, 51, 52, 7, - 19, 28, 42, 47, 17, 9, 13, 60, 12, 14, 16, 21, 34, 37, 38, 40, 43, 50, 14, 33, 37, 44, 49, - 29, 1, 20, 23, 37, 39, 40, 59, 8, 14, 16, 21, 33, 34, 36, 40, 43, 45, 61, 14, 16, 20, 33, - 43, 44, 52, 58, 36, 57, 0, 7, 14, 15, 33, 36, 37, 52, 1, 9, 13, 54, 57, 0, 2, 10, 30, - 47, 50, 14, 29, 33, 37, 38, 46, 53, 2, 20, 34, 38, 8, 15, 18, 21, 23, 24, 29, 37, 50, 51, - 59, 43, 49, 0, 10, 20, 28, 30, 42, 57, 34, 46, 14, 16, 20, 33, 42, 45, 51, 4, 11, 18, 21, - 23, 24, 29, 45, 50, 55, 14, 29, 38, 40, 43, 61, 1, 6, 7, 13, 19, 41, 57, 15, 51, 5, 6, - 5, 6, 9, 13, 17, 39, 41, 48, 54, 38, 3, 8, 15, 36, 45, 32, 2, 37, 53}; - - std::vector w_h(ind_h.size(), float{1.0}); - - int num_verts = off_h.size() - 1; - int num_edges = ind_h.size(); - - rmm::device_uvector offsets_v(num_verts + 1, stream); - rmm::device_uvector indices_v(num_edges, stream); - rmm::device_uvector weights_v(num_edges, stream); - rmm::device_uvector result_v(num_verts, stream); - - raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); - raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); - raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - - cugraph::legacy::GraphCSRView graph_csr( - offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); - - cugraph::ecg(handle, graph_csr, .05, 16, result_v.data()); - - auto cluster_id = cugraph::test::to_host(handle, result_v); - - int max = *max_element(cluster_id.begin(), cluster_id.end()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); - - ASSERT_EQ((min >= 0), 1); - - std::set cluster_ids; - for (auto c : cluster_id) { - cluster_ids.insert(c); - } - - ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); - - float modularity{0.0}; - - cugraph::ext_raft::analyzeClustering_modularity(graph_csr, max + 1, result_v.data(), &modularity); - - float random_modularity{0.95 * 0.4962422251701355}; - - ASSERT_GT(modularity, random_modularity); -} - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 0f30e0a7f12..d3fee7e3dd9 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -51,32 +51,6 @@ class Tests_Louvain virtual void SetUp() {} virtual void TearDown() {} - template - void run_legacy_test(std::tuple const& param) - { - auto [louvain_usecase, input_usecase] = param; - - // Legacy implementation does not support resolution parameter, - // defaulting it to 1. If the test case is not resolution - // 1 then skip it. - if (louvain_usecase.resolution_) - if (louvain_usecase.resolution_ != double{1}) return; - - raft::handle_t handle{}; - - bool directed{false}; - - auto graph = cugraph::test::legacy::construct_graph_csr( - handle, input_usecase, true); - auto graph_view = graph->view(); - - louvain_legacy(graph_view, - graph_view.get_number_of_vertices(), - louvain_usecase.check_correctness_, - louvain_usecase.expected_level_, - louvain_usecase.expected_modularity_); - } - template void run_current_test(std::tuple const& param) { @@ -130,32 +104,6 @@ class Tests_Louvain } } - template - void louvain_legacy(cugraph::legacy::GraphCSRView const& graph_view, - vertex_t num_vertices, - bool check_correctness, - int expected_level, - float expected_modularity) - { - raft::handle_t handle{}; - - rmm::device_uvector clustering_v(num_vertices, handle.get_stream()); - size_t level; - weight_t modularity; - - std::tie(level, modularity) = - cugraph::louvain(handle, graph_view, clustering_v.data(), size_t{100}, weight_t{1}); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - - float compare_modularity = static_cast(modularity); - - if (check_correctness) { - ASSERT_FLOAT_EQ(compare_modularity, expected_modularity); - ASSERT_EQ(level, expected_level); - } - } - template void louvain( cugraph::graph_view_t const& graph_view, @@ -223,75 +171,6 @@ class Tests_Louvain // FIXME: add tests for type combinations -TEST(louvain_legacy, success) -{ - raft::handle_t handle; - - auto stream = handle.get_stream(); - - std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, - 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, - 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; - std::vector ind_h = { - 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, - 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, - 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, - 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, - 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, - 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, - 33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32}; - std::vector w_h = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - - std::vector result_h = {1, 1, 1, 1, 0, 0, 0, 1, 3, 1, 0, 1, 1, 1, 3, 3, 0, - 1, 3, 1, 3, 1, 3, 2, 2, 2, 3, 2, 1, 3, 3, 2, 3, 3}; - - int num_verts = off_h.size() - 1; - int num_edges = ind_h.size(); - - rmm::device_uvector offsets_v(num_verts + 1, stream); - rmm::device_uvector indices_v(num_edges, stream); - rmm::device_uvector weights_v(num_edges, stream); - rmm::device_uvector result_v(num_verts, stream); - - raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); - raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); - raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); - - cugraph::legacy::GraphCSRView G( - offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); - - float modularity{0.0}; - size_t num_level = 40; - - // "FIXME": remove this check once we drop support for Pascal - // - // Calling louvain on Pascal will throw an exception, we'll check that - // this is the behavior while we still support Pascal (device_prop.major < 7) - // - if (handle.get_device_properties().major < 7) { - EXPECT_THROW(cugraph::louvain(handle, G, result_v.data()), cugraph::logic_error); - } else { - std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data()); - - auto cluster_id = cugraph::test::to_host(handle, result_v); - - int min = *min_element(cluster_id.begin(), cluster_id.end()); - - ASSERT_GE(min, 0); - ASSERT_FLOAT_EQ(modularity, 0.408695); - ASSERT_EQ(cluster_id, result_h); - } -} - using Tests_Louvain_File = Tests_Louvain; using Tests_Louvain_File32 = Tests_Louvain; using Tests_Louvain_File64 = Tests_Louvain; @@ -299,16 +178,6 @@ using Tests_Louvain_Rmat = Tests_Louvain; using Tests_Louvain_Rmat32 = Tests_Louvain; using Tests_Louvain_Rmat64 = Tests_Louvain; -#if 0 -// FIXME: Reenable legacy tests once threshold parameter is exposed -// by louvain legacy API. -TEST_P(Tests_Louvain_File, CheckInt32Int32FloatFloatLegacy) -{ - run_legacy_test( - override_File_Usecase_with_cmd_line_arguments(GetParam())); -} -#endif - TEST_P(Tests_Louvain_File, CheckInt32Int32FloatFloat) { run_current_test( @@ -336,11 +205,6 @@ TEST_P(Tests_Louvain_File64, CheckInt64Int64FloatFloat) #if 0 // FIXME: We should use these tests, gtest-1.11.0 makes it a runtime error // to define and not instantiate these. -TEST_P(Tests_Louvain_Rmat, CheckInt32Int32FloatFloatLegacy) -{ - run_legacy_test( - override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); -} TEST_P(Tests_Louvain_Rmat, CheckInt32Int32FloatFloat) { diff --git a/cpp/tests/lookup/lookup_src_dst_test.cpp b/cpp/tests/lookup/lookup_src_dst_test.cpp index b2e4355dff4..87f4958f740 100644 --- a/cpp/tests/lookup/lookup_src_dst_test.cpp +++ b/cpp/tests/lookup/lookup_src_dst_test.cpp @@ -301,6 +301,6 @@ INSTANTIATE_TEST_SUITE_P( Tests_SGLookupEdgeSrcDst_Rmat, ::testing::Combine( ::testing::Values(EdgeSrcDstLookup_UseCase{false}), - ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/lookup/mg_lookup_src_dst_test.cpp b/cpp/tests/lookup/mg_lookup_src_dst_test.cpp index 26119801b76..8d31b0ca157 100644 --- a/cpp/tests/lookup/mg_lookup_src_dst_test.cpp +++ b/cpp/tests/lookup/mg_lookup_src_dst_test.cpp @@ -344,6 +344,6 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLookupEdgeSrcDst_Rmat, ::testing::Combine( ::testing::Values(EdgeSrcDstLookup_UseCase{false}), - ::testing::Values(cugraph::test::Rmat_Usecase(5, 32, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/notebooks/algorithms/community/Community-Clustering.ipynb b/notebooks/algorithms/community/Community-Clustering.ipynb index 8ff14c66c95..4560073b1aa 100644 --- a/notebooks/algorithms/community/Community-Clustering.ipynb +++ b/notebooks/algorithms/community/Community-Clustering.ipynb @@ -78,7 +78,8 @@ "def compute_clusters(_graph) :\n", "\n", " # Compute ECG Clusters and normalize the column names\n", - " _e = cugraph.ecg(_graph).rename(columns={'partition': 'cluster'})\n", + " _e, _ = cugraph.ecg(_graph)\n", + " _e = _e.rename(columns={'partition': 'cluster'})\n", " \n", " # Compute Louvain Clusters \n", " _l, modularity = cugraph.louvain(_graph)\n", @@ -354,7 +355,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2022, NVIDIA CORPORATION.\n", + "Copyright (c) 2022-2024, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -379,7 +380,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.11.9" }, "orig_nbformat": 4, "vscode": { diff --git a/notebooks/algorithms/community/ECG.ipynb b/notebooks/algorithms/community/ECG.ipynb index c6c5a7b1b03..966cd0f42c9 100644 --- a/notebooks/algorithms/community/ECG.ipynb +++ b/notebooks/algorithms/community/ECG.ipynb @@ -162,16 +162,7 @@ "outputs": [], "source": [ "# Call ecg on the graph\n", - "df = cugraph.ecg(G) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df.dtypes" + "df, _ = cugraph.ecg(G)" ] }, { @@ -215,7 +206,7 @@ "metadata": {}, "source": [ "___\n", - "Copyright (c) 2019-2023, NVIDIA CORPORATION.\n", + "Copyright (c) 2019-2024, NVIDIA CORPORATION.\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", "\n", @@ -240,7 +231,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.9" }, "vscode": { "interpreter": { diff --git a/python/cugraph/cugraph/community/ecg.py b/python/cugraph/cugraph/community/ecg.py index fbf8df43867..36f9d51f781 100644 --- a/python/cugraph/cugraph/community/ecg.py +++ b/python/cugraph/cugraph/community/ecg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,17 +11,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.utilities import ( - ensure_cugraph_obj_for_nx, - df_score_to_dictionary, -) -import cudf - from pylibcugraph import ecg as pylibcugraph_ecg from pylibcugraph import ResourceHandle - -def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None): +import cudf +import warnings +from cugraph.utilities import ensure_cugraph_obj_for_nx, df_score_to_dictionary + + +def ecg( + input_graph, + min_weight: float = 0.0001, + ensemble_size: int = 100, + max_level: int = 10, + threshold: float = 1e-7, + resolution: float = 1.0, + random_state: int = None, + weight=None, +): """ Compute the Ensemble Clustering for Graphs (ECG) partition of the input graph. ECG runs truncated Louvain on an ensemble of permutations of the @@ -48,7 +55,31 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None): The default value is 16, larger values may produce higher quality partitions for some graphs. + max_level : integer, optional (default=100) + This controls the maximum number of levels/iterations of the ECG + algorithm. When specified the algorithm will terminate after no more + than the specified number of iterations. No error occurs when the + algorithm terminates early in this manner. + + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + Defaults to 1e-7. + + resolution: float, optional (default=1.0) + Called gamma in the modularity formula, this changes the size + of the communities. Higher resolutions lead to more smaller + communities, lower resolutions lead to fewer larger communities. + Defaults to 1. + + random_state: int, optional(default=None) + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + weight : str, optional (default=None) + Deprecated. This parameter is here for NetworkX compatibility and represents which NetworkX data column represents Edge weights. @@ -58,37 +89,58 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None): GPU data frame of size V containing two columns, the vertex id and the partition id it is assigned to. - df[vertex] : cudf.Series + parts[vertex] : cudf.Series Contains the vertex identifiers - df[partition] : cudf.Series + parts[partition] : cudf.Series Contains the partition assigned to the vertices + modularity_score : float + A floating point number containing the global modularity score + of the partitioning. + Examples -------- >>> from cugraph.datasets import karate >>> G = karate.get_graph(download=True) - >>> parts = cugraph.ecg(G) + >>> parts, mod = cugraph.ecg(G) """ input_graph, isNx = ensure_cugraph_obj_for_nx(input_graph) - vertex, partition = pylibcugraph_ecg( + if isNx: + warning_msg = ( + " We are deprecating support for handling " + "NetworkX types in the next release." + ) + warnings.warn(warning_msg, UserWarning) + + if weight is not None: + warning_msg = ( + "This parameter is deprecated and will be removed in the next release." + ) + warnings.warn(warning_msg, UserWarning) + + vertex, partition, modularity_score = pylibcugraph_ecg( resource_handle=ResourceHandle(), + random_state=random_state, graph=input_graph._plc_graph, min_weight=min_weight, ensemble_size=ensemble_size, + max_level=max_level, + threshold=threshold, + resolution=resolution, do_expensive_check=False, ) - df = cudf.DataFrame() - df["vertex"] = vertex - df["partition"] = partition + parts = cudf.DataFrame() + parts["vertex"] = vertex + parts["partition"] = partition if input_graph.renumbered: - df = input_graph.unrenumber(df, "vertex") + parts = input_graph.unrenumber(parts, "vertex") if isNx is True: - df = df_score_to_dictionary(df, "partition") + parts = df_score_to_dictionary(parts, "partition") - return df + return parts, modularity_score diff --git a/python/cugraph/cugraph/tests/community/test_ecg.py b/python/cugraph/cugraph/tests/community/test_ecg.py index be59b5d5bb3..e0ad71744b4 100644 --- a/python/cugraph/cugraph/tests/community/test_ecg.py +++ b/python/cugraph/cugraph/tests/community/test_ecg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,14 +21,20 @@ from cugraph.datasets import karate, dolphins, netscience -def cugraph_call(G, min_weight, ensemble_size): - df = cugraph.ecg(G, min_weight, ensemble_size) - num_parts = df["partition"].max() + 1 - score = cugraph.analyzeClustering_modularity( - G, num_parts, df, "vertex", "partition" +def cugraph_call( + G, min_weight, ensemble_size, max_level, threshold, resolution, random_state +): + parts, mod = cugraph.ecg( + G, + min_weight=min_weight, + ensemble_size=ensemble_size, + max_level=max_level, + threshold=threshold, + resolution=resolution, + random_state=random_state, ) - - return score, num_parts + num_parts = parts["partition"].max() + 1 + return mod, num_parts def golden_call(filename): @@ -42,16 +48,30 @@ def golden_call(filename): DATASETS = [karate, dolphins, netscience] -MIN_WEIGHTS = [0.05, 0.10, 0.15] +MIN_WEIGHTS = [0.05, 0.15] ENSEMBLE_SIZES = [16, 32] +MAX_LEVELS = [10, 20] + +RESOLUTIONS = [0.95, 1.0] + +THRESHOLDS = [1e-6, 1e-07] + +RANDOM_STATES = [0, 42] + @pytest.mark.sg @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) -def test_ecg_clustering(dataset, min_weight, ensemble_size): +@pytest.mark.parametrize("max_level", MAX_LEVELS) +@pytest.mark.parametrize("threshold", THRESHOLDS) +@pytest.mark.parametrize("resolution", RESOLUTIONS) +@pytest.mark.parametrize("random_state", RANDOM_STATES) +def test_ecg_clustering( + dataset, min_weight, ensemble_size, max_level, threshold, resolution, random_state +): gc.collect() # Read in the graph and get a cugraph object @@ -62,20 +82,28 @@ def test_ecg_clustering(dataset, min_weight, ensemble_size): ) # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + cu_score, num_parts = cugraph_call( + G, min_weight, ensemble_size, max_level, threshold, resolution, random_state + ) filename = dataset.metadata["name"] golden_score = golden_call(filename) # Assert that the partitioning has better modularity than the random # assignment - assert cu_score > (0.95 * golden_score) + assert cu_score > (0.80 * golden_score) @pytest.mark.sg @pytest.mark.parametrize("dataset", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) -def test_ecg_clustering_nx(dataset, min_weight, ensemble_size): +@pytest.mark.parametrize("max_level", MAX_LEVELS) +@pytest.mark.parametrize("threshold", THRESHOLDS) +@pytest.mark.parametrize("resolution", RESOLUTIONS) +@pytest.mark.parametrize("random_state", RANDOM_STATES) +def test_ecg_clustering_nx( + dataset, min_weight, ensemble_size, max_level, threshold, resolution, random_state +): gc.collect() dataset_path = dataset.get_path() @@ -86,6 +114,14 @@ def test_ecg_clustering_nx(dataset, min_weight, ensemble_size): ) # Get the modularity score for partitioning versus random assignment - df_dict = cugraph.ecg(G, min_weight, ensemble_size, "weight") + df_dict, _ = cugraph.ecg( + G, + min_weight=min_weight, + ensemble_size=ensemble_size, + max_level=max_level, + threshold=threshold, + resolution=resolution, + random_state=random_state, + ) assert isinstance(df_dict, dict) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd index 0e9529146cc..b8cd6148456 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd @@ -153,18 +153,6 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_error_t** error ) ########################################################################### - # Legacy ECG - cdef cugraph_error_code_t \ - cugraph_legacy_ecg( - const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool_t do_expensive_check, - cugraph_hierarchical_clustering_result_t** result, - cugraph_error_t** error - ) - # ECG cdef cugraph_error_code_t \ cugraph_ecg( diff --git a/python/pylibcugraph/pylibcugraph/ecg.pyx b/python/pylibcugraph/pylibcugraph/ecg.pyx index 4e5407348e3..4b8d8901d34 100644 --- a/python/pylibcugraph/pylibcugraph/ecg.pyx +++ b/python/pylibcugraph/pylibcugraph/ecg.pyx @@ -31,10 +31,10 @@ from pylibcugraph._cugraph_c.graph cimport ( ) from pylibcugraph._cugraph_c.community_algorithms cimport ( cugraph_hierarchical_clustering_result_t, - cugraph_legacy_ecg, cugraph_ecg, cugraph_hierarchical_clustering_result_get_vertices, cugraph_hierarchical_clustering_result_get_clusters, + cugraph_hierarchical_clustering_result_get_modularity, cugraph_hierarchical_clustering_result_free, ) @@ -48,12 +48,21 @@ from pylibcugraph.utils cimport ( assert_success, copy_to_cupy_array, ) - +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) def ecg(ResourceHandle resource_handle, + random_state, _GPUGraph graph, - min_weight, + double min_weight, size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, bool_t do_expensive_check ): """ @@ -71,6 +80,11 @@ def ecg(ResourceHandle resource_handle, Handle to the underlying device resources needed for referencing data and running algorithms. + random_state : int , optional + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + graph : SGGraph The input graph. @@ -84,6 +98,24 @@ def ecg(ResourceHandle resource_handle, The default value is 16, larger values may produce higher quality partitions for some graphs. + max_level: size_t + This controls the maximum number of levels/iterations of the leiden + algorithm. When specified the algorithm will terminate after no more + than the specified number of iterations. No error occurs when the + algorithm terminates early in this manner. + + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + + resolution: double + Called gamma in the modularity formula, this changes the size + of the communities. Higher resolutions lead to more smaller + communities, lower resolutions lead to fewer larger communities. + Defaults to 1. + do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. @@ -125,13 +157,22 @@ def ecg(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - error_code = cugraph_legacy_ecg(c_resource_handle_ptr, + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = cg_rng_state.rng_state_ptr + + error_code = cugraph_ecg(c_resource_handle_ptr, + rng_state_ptr, c_graph_ptr, min_weight, ensemble_size, + max_level, + threshold, + resolution, do_expensive_check, &result_ptr, &error_ptr) + assert_success(error_code, error_ptr, "cugraph_ecg") # Extract individual device array pointers from result and copy to cupy @@ -140,10 +181,12 @@ def ecg(ResourceHandle resource_handle, cugraph_hierarchical_clustering_result_get_vertices(result_ptr) cdef cugraph_type_erased_device_array_view_t* clusters_ptr = \ cugraph_hierarchical_clustering_result_get_clusters(result_ptr) + cdef double modularity = \ + cugraph_hierarchical_clustering_result_get_modularity(result_ptr) cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) cupy_clusters = copy_to_cupy_array(c_resource_handle_ptr, clusters_ptr) cugraph_hierarchical_clustering_result_free(result_ptr) - return (cupy_vertices, cupy_clusters) + return (cupy_vertices, cupy_clusters, modularity)