Skip to content

Commit

Permalink
Fix OOM Bug for Jaccard, Sorensen, and Overlap benchmarks (#4524)
Browse files Browse the repository at this point in the history
Closes #4510

This PR fixes a bug in `benchmarks/cugraph/pytest-based/bench_algos.py` for Jaccard and Overlap (and adds Sorensen to the list of algos to be run).

The reason for the error was that the benchmarks were trying to compute two_hop_neighbors on all nodes which would cause the GPU to run out of memory.

Authors:
  - Ralph Liu (https://github.com/nv-rliu)

Approvers:
  - Joseph Nke (https://github.com/jnke2016)
  - Alex Barghi (https://github.com/alexbarghi-nv)

URL: #4524
  • Loading branch information
nv-rliu authored Jul 9, 2024
1 parent 2cabe02 commit 4464504
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
33 changes: 31 additions & 2 deletions benchmarks/cugraph/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,20 @@ def is_graph_distributed(graph):
return isinstance(graph.edgelist.edgelist_df, dask_cudf.DataFrame)


def get_vertex_pairs(G, num_vertices=10):
"""
Return a DateFrame containing two-hop vertex pairs randomly sampled from
a Graph.
"""
random_vertices = G.select_random_vertices(num_vertices=num_vertices)

if isinstance(random_vertices, dask_cudf.Series):
random_vertices = random_vertices.compute()

vertices = random_vertices.to_arrow().to_pylist()
return G.get_two_hop_neighbors(start_vertices=vertices)


###############################################################################
# Benchmarks
def bench_create_graph(gpubenchmark, edgelist):
Expand Down Expand Up @@ -323,8 +337,20 @@ def bench_sssp(gpubenchmark, graph):

def bench_jaccard(gpubenchmark, unweighted_graph):
G = unweighted_graph
# algo cannot compute neighbors on all nodes without running into OOM
# this is why we will call jaccard on a subset of nodes
vert_pairs = get_vertex_pairs(G)
jaccard = dask_cugraph.jaccard if is_graph_distributed(G) else cugraph.jaccard
gpubenchmark(jaccard, G)
gpubenchmark(jaccard, G, vert_pairs)


def bench_sorensen(gpubenchmark, unweighted_graph):
G = unweighted_graph
# algo cannot compute neighbors on all nodes without running into OOM
# this is why we will call sorensen on a subset of nodes
vert_pairs = get_vertex_pairs(G)
sorensen = dask_cugraph.sorensen if is_graph_distributed(G) else cugraph.sorensen
gpubenchmark(sorensen, G, vert_pairs)


@pytest.mark.skipif(
Expand All @@ -347,8 +373,11 @@ def bench_weakly_connected_components(gpubenchmark, graph):

def bench_overlap(gpubenchmark, unweighted_graph):
G = unweighted_graph
# algo cannot compute neighbors on all nodes without running into OOM
# this is why we will call sorensen on a subset of nodes
vertex_pairs = get_vertex_pairs(G)
overlap = dask_cugraph.overlap if is_graph_distributed(G) else cugraph.overlap
gpubenchmark(overlap, G)
gpubenchmark(overlap, G, vertex_pairs)


def bench_triangle_count(gpubenchmark, graph):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,12 @@ def get_two_hop_neighbors(self, start_vertices=None):
Compute vertex pairs that are two hops apart. The resulting pairs are
sorted before returning.
Parameters
----------
start_vertices : Int or List (default=None)
Subset of vertices to compute two hop neighbors on. If None, compute
for all nodes.
Returns
-------
df : cudf.DataFrame
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,12 @@ def get_two_hop_neighbors(self, start_vertices=None):
Compute vertex pairs that are two hops apart. The resulting pairs are
sorted before returning.
Parameters
----------
start_vertices : Int or List (default=None)
Subset of vertices to compute two hop neighbors on. If None, compute
for all nodes.
Returns
-------
df : cudf.DataFrame
Expand Down

0 comments on commit 4464504

Please sign in to comment.