diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 4eecff8b3f1..f0b86c791f8 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -46,6 +46,7 @@ dependencies: - packaging>=21 - pandas - pre-commit +- pydantic - pydata-sphinx-theme - pylibcugraphops==24.10.*,>=0.0.0a0 - pylibraft==24.10.*,>=0.0.0a0 @@ -72,6 +73,7 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 +- torchdata - ucx-proc=*=gpu - ucx-py==0.40.*,>=0.0.0a0 - wget diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 0930c8d450d..ebded3eec92 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -51,6 +51,7 @@ dependencies: - packaging>=21 - pandas - pre-commit +- pydantic - pydata-sphinx-theme - pylibcugraphops==24.10.*,>=0.0.0a0 - pylibraft==24.10.*,>=0.0.0a0 @@ -77,6 +78,7 @@ dependencies: - sphinx<6 - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 +- torchdata - ucx-proc=*=gpu - ucx-py==0.40.*,>=0.0.0a0 - wget diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index ca4fdb7f2fc..0affe456b73 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -28,8 +28,10 @@ requirements: - numba >=0.57 - numpy >=1.23,<2.0a0 - pylibcugraphops ={{ minor_version }} + - tensordict >=0.1.2 - python - - pytorch + - pytorch >=2.0 + - cupy >=12.0.0 tests: imports: diff --git a/dependencies.yaml b/dependencies.yaml index 9e9cfcc63a3..6bb728a2aae 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -659,6 +659,7 @@ dependencies: - *cugraph_unsuffixed - pytorch>=2.0 - pytorch-cuda==11.8 + - &tensordict tensordict>=0.1.2 - dgl>=1.1.0.cu* cugraph_pyg_dev: common: @@ -667,7 +668,7 @@ dependencies: - *cugraph_unsuffixed - pytorch>=2.0 - pytorch-cuda==11.8 - - &tensordict tensordict>=0.1.2 + - *tensordict - pyg>=2.5,<2.6 depends_on_pytorch: @@ -675,6 +676,8 @@ dependencies: - output_types: [conda] packages: - &pytorch_unsuffixed pytorch>=2.0,<2.2.0a0 + - torchdata + - pydantic specific: - output_types: [requirements] diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index b7846ad31a8..ea30b652286 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -21,4 +21,5 @@ dependencies: - pytorch-cuda==11.8 - pytorch>=2.0 - scipy +- tensordict>=0.1.2 name: cugraph_dgl_dev_cuda-118 diff --git a/python/cugraph-dgl/cugraph_dgl/__init__.py b/python/cugraph-dgl/cugraph_dgl/__init__.py index 03ff50896a4..58850d47fba 100644 --- a/python/cugraph-dgl/cugraph_dgl/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,8 +15,12 @@ # to prevent rapids context being created when importing cugraph_dgl os.environ["RAPIDS_NO_INITIALIZE"] = "1" +from cugraph_dgl.graph import Graph from cugraph_dgl.cugraph_storage import CuGraphStorage -from cugraph_dgl.convert import cugraph_storage_from_heterograph +from cugraph_dgl.convert import ( + cugraph_storage_from_heterograph, + cugraph_dgl_graph_from_heterograph, +) import cugraph_dgl.dataloading import cugraph_dgl.nn diff --git a/python/cugraph-dgl/cugraph_dgl/convert.py b/python/cugraph-dgl/cugraph_dgl/convert.py index 1235f07adf1..ae4b96dd391 100644 --- a/python/cugraph-dgl/cugraph_dgl/convert.py +++ b/python/cugraph-dgl/cugraph_dgl/convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,6 +12,8 @@ # limitations under the License. from __future__ import annotations from cugraph.utilities.utils import import_optional + +import cugraph_dgl from cugraph_dgl import CuGraphStorage from cugraph_dgl.utils.cugraph_conversion_utils import ( get_edges_dict_from_dgl_HeteroGraph, @@ -39,3 +41,53 @@ def cugraph_storage_from_heterograph( add_ndata_from_dgl_HeteroGraph(gs, g) add_edata_from_dgl_HeteroGraph(gs, g) return gs + + +def cugraph_dgl_graph_from_heterograph( + input_graph: dgl.DGLGraph, + single_gpu: bool = True, + ndata_storage: str = "torch", + edata_storage: str = "torch", + **kwargs, +) -> cugraph_dgl.Graph: + """ + Converts a DGL Graph to a cuGraph-DGL Graph. + """ + + output_graph = cugraph_dgl.Graph( + is_multi_gpu=(not single_gpu), + ndata_storage=ndata_storage, + edata_storage=edata_storage, + **kwargs, + ) + + # Calling is_homogeneous does not work here + if len(input_graph.ntypes) <= 1: + output_graph.add_nodes( + input_graph.num_nodes(), data=input_graph.ndata, ntype=input_graph.ntypes[0] + ) + else: + for ntype in input_graph.ntypes: + data = { + k: v_dict[ntype] + for k, v_dict in input_graph.ndata.items() + if ntype in v_dict + } + output_graph.add_nodes(input_graph.num_nodes(ntype), data=data, ntype=ntype) + + if len(input_graph.canonical_etypes) <= 1: + can_etype = input_graph.canonical_etypes[0] + src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) + output_graph.add_edges(src_t, dst_t, input_graph.edata, etype=can_etype) + else: + for can_etype in input_graph.canonical_etypes: + data = { + k: v_dict[can_etype] + for k, v_dict in input_graph.edata.items() + if can_etype in v_dict + } + + src_t, dst_t = input_graph.edges(form="uv", etype=can_etype) + output_graph.add_edges(src_t, dst_t, data=data, etype=can_etype) + + return output_graph diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py index 2fd7d29bd49..8a2e9cd954d 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,9 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings + from cugraph_dgl.dataloading.dataset import ( HomogenousBulkSamplerDataset, HeterogenousBulkSamplerDataset, ) + +from cugraph_dgl.dataloading.sampler import Sampler from cugraph_dgl.dataloading.neighbor_sampler import NeighborSampler -from cugraph_dgl.dataloading.dataloader import DataLoader + +from cugraph_dgl.dataloading.dask_dataloader import DaskDataLoader +from cugraph_dgl.dataloading.dataloader import DataLoader as FutureDataLoader + + +def DataLoader(*args, **kwargs): + warnings.warn( + "DataLoader has been renamed to DaskDataLoader. " + "In Release 24.10, cugraph_dgl.dataloading.FutureDataLoader " + "will take over the DataLoader name.", + FutureWarning, + ) + return DaskDataLoader(*args, **kwargs) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py new file mode 100644 index 00000000000..e220b93f738 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dask_dataloader.py @@ -0,0 +1,321 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations +import os +import shutil +import cugraph_dgl +import cupy as cp +import cudf +from cugraph.utilities.utils import import_optional +from cugraph.gnn import BulkSampler +from dask.distributed import default_client, Event +from cugraph_dgl.dataloading import ( + HomogenousBulkSamplerDataset, + HeterogenousBulkSamplerDataset, +) +from cugraph_dgl.dataloading.utils.extract_graph_helpers import ( + create_cugraph_graph_from_edges_dict, +) + +dgl = import_optional("dgl") +torch = import_optional("torch") + + +class DaskDataLoader(torch.utils.data.DataLoader): + """ + Sampled graph data loader. Wrap a :class:`~cugraph_dgl.CuGraphStorage` and a + :class:`~cugraph_dgl.dataloading.NeighborSampler` into + an iterable over mini-batches of samples. cugraph_dgl's ``DataLoader`` extends + PyTorch's ``DataLoader`` by handling creation and + transmission of graph samples. + """ + + def __init__( + self, + graph: cugraph_dgl.CuGraphStorage, + indices: torch.Tensor, + graph_sampler: cugraph_dgl.dataloading.NeighborSampler, + sampling_output_dir: str, + batches_per_partition: int = 50, + seeds_per_call: int = 200_000, + device: torch.device = None, + use_ddp: bool = False, + ddp_seed: int = 0, + batch_size: int = 1024, + drop_last: bool = False, + shuffle: bool = False, + sparse_format: str = "coo", + **kwargs, + ): + """ + Constructor for DaskDataLoader: + ------------------------------- + graph : CuGraphStorage + The graph. + indices : Tensor or dict[ntype, Tensor] + The set of indices. It can either be a tensor of + integer indices or a dictionary of types and indices. + The actual meaning of the indices is defined by the :meth:`sample` method of + :attr:`graph_sampler`. + graph_sampler : cugraph_dgl.dataloading.NeighborSampler + The subgraph sampler. + sampling_output_dir: str + Output directory to share sampling results in + batches_per_partition: int + The number of batches of sampling results to write/read + seeds_per_call: int + The number of seeds to sample at once + device : device context, optional + The device of the generated MFGs in each iteration, which should be a + PyTorch device object (e.g., ``torch.device``). + By default this returns the tenors on device with the current + cuda context + use_ddp : boolean, optional + If True, tells the DataLoader to split the training set for each + participating process appropriately using + :class:`torch.utils.data.distributed.DistributedSampler`. + Overrides the :attr:`sampler` argument of + :class:`torch.utils.data.DataLoader`. + ddp_seed : int, optional + The seed for shuffling the dataset in + :class:`torch.utils.data.distributed.DistributedSampler`. + Only effective when :attr:`use_ddp` is True. + batch_size: int + Batch size. + sparse_format: str, default = "coo" + The sparse format of the emitted sampled graphs. Choose between "csc" + and "coo". When using "csc", the graphs are of type + cugraph_dgl.nn.SparseGraph. + kwargs : dict + Key-word arguments to be passed to the parent PyTorch + :py:class:`torch.utils.data.DataLoader` class. Common arguments are: + - ``batch_size`` (int): The number of indices in each batch. + - ``drop_last`` (bool): Whether to drop the last incomplete + batch. + - ``shuffle`` (bool): Whether to randomly shuffle the + indices at each epoch + Examples + -------- + To train a 3-layer GNN for node classification on a set of nodes + ``train_nid`` on a homogeneous graph where each node takes messages + from 15 neighbors on the first layer, 10 neighbors on the second, and + 5 neighbors on the third: + >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) + >>> dataloader = cugraph_dgl.dataloading.DataLoader( + ... g, train_nid, sampler, + ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) + >>> for input_nodes, output_nodes, blocks in dataloader: + ... train_on(input_nodes, output_nodes, blocks) + **Using with Distributed Data Parallel** + If you are using PyTorch's distributed training (e.g. when using + :mod:`torch.nn.parallel.DistributedDataParallel`), + you can train the model by turning + on the `use_ddp` option: + >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) + >>> dataloader = cugraph_dgl.dataloading.DataLoader( + ... g, train_nid, sampler, use_ddp=True, + ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) + >>> for epoch in range(start_epoch, n_epochs): + ... for input_nodes, output_nodes, blocks in dataloader: + ... + """ + if sparse_format not in ["coo", "csc"]: + raise ValueError( + f"sparse_format must be one of 'coo', 'csc', " + f"but got {sparse_format}." + ) + self.sparse_format = sparse_format + + self.ddp_seed = ddp_seed + self.use_ddp = use_ddp + self.shuffle = shuffle + self.drop_last = drop_last + self.graph_sampler = graph_sampler + worker_init_fn = dgl.dataloading.WorkerInitWrapper( + kwargs.get("worker_init_fn", None) + ) + self.other_storages = {} + self.epoch_number = 0 + self._batch_size = batch_size + self._sampling_output_dir = sampling_output_dir + self._batches_per_partition = batches_per_partition + self._seeds_per_call = seeds_per_call + self._rank = None + + indices = _dgl_idx_to_cugraph_idx(indices, graph) + + self.tensorized_indices_ds = dgl.dataloading.create_tensorized_dataset( + indices, + batch_size, + drop_last, + use_ddp, + ddp_seed, + shuffle, + kwargs.get("persistent_workers", False), + ) + + if len(graph.ntypes) <= 1: + self.cugraph_dgl_dataset = HomogenousBulkSamplerDataset( + total_number_of_nodes=graph.total_number_of_nodes, + edge_dir=self.graph_sampler.edge_dir, + sparse_format=sparse_format, + ) + else: + etype_id_to_etype_str_dict = {v: k for k, v in graph._etype_id_dict.items()} + + self.cugraph_dgl_dataset = HeterogenousBulkSamplerDataset( + num_nodes_dict=graph.num_nodes_dict, + etype_id_dict=etype_id_to_etype_str_dict, + etype_offset_dict=graph._etype_offset_d, + ntype_offset_dict=graph._ntype_offset_d, + edge_dir=self.graph_sampler.edge_dir, + ) + + if use_ddp: + rank = torch.distributed.get_rank() + client = default_client() + self._graph_creation_event = Event("cugraph_dgl_load_mg_graph_event") + if rank == 0: + G = create_cugraph_graph_from_edges_dict( + edges_dict=graph._edges_dict, + etype_id_dict=graph._etype_id_dict, + edge_dir=graph_sampler.edge_dir, + ) + client.publish_dataset(cugraph_dgl_mg_graph_ds=G) + self._graph_creation_event.set() + else: + if self._graph_creation_event.wait(timeout=1000): + G = client.get_dataset("cugraph_dgl_mg_graph_ds") + else: + raise RuntimeError( + f"Fetch cugraph_dgl_mg_graph_ds to worker_id {rank}", + "from worker_id 0 failed", + ) + else: + rank = 0 + G = create_cugraph_graph_from_edges_dict( + edges_dict=graph._edges_dict, + etype_id_dict=graph._etype_id_dict, + edge_dir=graph_sampler.edge_dir, + ) + + self._rank = rank + self._cugraph_graph = G + super().__init__( + self.cugraph_dgl_dataset, + batch_size=None, + worker_init_fn=worker_init_fn, + collate_fn=lambda x: x, # Hack to prevent collating + **kwargs, + ) + + def __iter__(self): + output_dir = os.path.join( + self._sampling_output_dir, "epoch_" + str(self.epoch_number) + ) + kwargs = {} + if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): + kwargs["deduplicate_sources"] = True + kwargs["prior_sources_behavior"] = "carryover" + kwargs["renumber"] = True + + if self.sparse_format == "csc": + kwargs["compression"] = "CSR" + kwargs["compress_per_hop"] = True + # The following kwargs will be deprecated in uniform sampler. + kwargs["use_legacy_names"] = False + kwargs["include_hop_column"] = False + + else: + kwargs["deduplicate_sources"] = False + kwargs["prior_sources_behavior"] = None + kwargs["renumber"] = False + + bs = BulkSampler( + output_path=output_dir, + batch_size=self._batch_size, + graph=self._cugraph_graph, + batches_per_partition=self._batches_per_partition, + seeds_per_call=self._seeds_per_call, + fanout_vals=self.graph_sampler._reversed_fanout_vals, + with_replacement=self.graph_sampler.replace, + **kwargs, + ) + + if self.shuffle: + self.tensorized_indices_ds.shuffle() + + batch_df = create_batch_df(self.tensorized_indices_ds) + bs.add_batches(batch_df, start_col_name="start", batch_col_name="batch_id") + bs.flush() + self.cugraph_dgl_dataset.set_input_files(input_directory=output_dir) + self.epoch_number = self.epoch_number + 1 + return super().__iter__() + + def __del__(self): + if self.use_ddp: + torch.distributed.barrier() + if self._rank == 0: + if self.use_ddp: + client = default_client() + client.unpublish_dataset("cugraph_dgl_mg_graph_ds") + self._graph_creation_event.clear() + _clean_directory(self._sampling_output_dir) + + +def get_batch_id_series(n_output_rows: int, batch_size: int) -> cudf.Series: + num_batches = (n_output_rows + batch_size - 1) // batch_size + print(f"Number of batches = {num_batches}".format(num_batches)) + batch_ar = cp.arange(0, num_batches).repeat(batch_size) + batch_ar = batch_ar[0:n_output_rows].astype(cp.int32) + return cudf.Series(batch_ar) + + +def create_batch_df(dataset: torch.Tensor) -> cudf.DataFrame: + batch_id_ls = [] + indices_ls = [] + for batch_id, b_indices in enumerate(dataset): + if isinstance(b_indices, dict): + b_indices = torch.cat(list(b_indices.values())) + batch_id_ar = cp.full(shape=len(b_indices), fill_value=batch_id, dtype=cp.int32) + batch_id_ls.append(batch_id_ar) + indices_ls.append(b_indices) + + batch_id_ar = cp.concatenate(batch_id_ls) + indices_ar = cp.asarray(torch.concat(indices_ls)) + batches_df = cudf.DataFrame( + { + "start": indices_ar, + "batch_id": batch_id_ar, + } + ) + return batches_df + + +def _dgl_idx_to_cugraph_idx(idx, cugraph_gs): + if not isinstance(idx, dict): + if len(cugraph_gs.ntypes) > 1: + raise dgl.DGLError( + "Must specify node type when the graph is not homogeneous." + ) + return idx + else: + return {k: cugraph_gs.dgl_n_id_to_cugraph_id(n, k) for k, n in idx.items()} + + +def _clean_directory(path): + """param could either be relative or absolute.""" + if os.path.isfile(path): + os.remove(path) # remove the file + elif os.path.isdir(path): + shutil.rmtree(path) # remove dir and all contains diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 11139910931..21b70b05f3a 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,151 +10,121 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations -import os -import shutil -import cugraph_dgl -import cupy as cp -import cudf + +import warnings + +from typing import Union, Optional, Dict + from cugraph.utilities.utils import import_optional -from cugraph.gnn import BulkSampler -from dask.distributed import default_client, Event -from cugraph_dgl.dataloading import ( - HomogenousBulkSamplerDataset, - HeterogenousBulkSamplerDataset, -) -from cugraph_dgl.dataloading.utils.extract_graph_helpers import ( - create_cugraph_graph_from_edges_dict, -) + +import cugraph_dgl +from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor dgl = import_optional("dgl") torch = import_optional("torch") -class DataLoader(torch.utils.data.DataLoader): +class DataLoader: """ - Sampled graph data loader. Wrap a :class:`~cugraph_dgl.CuGraphStorage` and a - :class:`~cugraph_dgl.dataloading.NeighborSampler` into - an iterable over mini-batches of samples. cugraph_dgl's ``DataLoader`` extends - PyTorch's ``DataLoader`` by handling creation and - transmission of graph samples. + Duck-typed version of dgl.dataloading.DataLoader """ def __init__( self, - graph: cugraph_dgl.CuGraphStorage, - indices: torch.Tensor, - graph_sampler: cugraph_dgl.dataloading.NeighborSampler, - sampling_output_dir: str, - batches_per_partition: int = 50, - seeds_per_call: int = 200_000, - device: torch.device = None, + graph: "cugraph_dgl.Graph", + indices: TensorType, + graph_sampler: "cugraph_dgl.dataloading.Sampler", + device: Union[int, str, "torch.device"] = None, use_ddp: bool = False, ddp_seed: int = 0, - batch_size: int = 1024, + batch_size: int = 1, drop_last: bool = False, shuffle: bool = False, - sparse_format: str = "coo", + use_prefetch_thread: Optional[bool] = None, + use_alternate_streams: Optional[bool] = None, + pin_prefetcher: Optional[bool] = None, + use_uva=False, + gpu_cache: Dict[str, Dict[str, int]] = None, + output_format: str = "dgl.Block", **kwargs, ): """ - Constructor for CuGraphStorage: - ------------------------------- - graph : CuGraphStorage - The graph. - indices : Tensor or dict[ntype, Tensor] - The set of indices. It can either be a tensor of - integer indices or a dictionary of types and indices. - The actual meaning of the indices is defined by the :meth:`sample` method of - :attr:`graph_sampler`. - graph_sampler : cugraph_dgl.dataloading.NeighborSampler - The subgraph sampler. - sampling_output_dir: str - Output directory to share sampling results in - batches_per_partition: int - The number of batches of sampling results to write/read - seeds_per_call: int - The number of seeds to sample at once - device : device context, optional - The device of the generated MFGs in each iteration, which should be a - PyTorch device object (e.g., ``torch.device``). - By default this returns the tenors on device with the current - cuda context - use_ddp : boolean, optional - If True, tells the DataLoader to split the training set for each - participating process appropriately using - :class:`torch.utils.data.distributed.DistributedSampler`. - Overrides the :attr:`sampler` argument of - :class:`torch.utils.data.DataLoader`. - ddp_seed : int, optional - The seed for shuffling the dataset in - :class:`torch.utils.data.distributed.DistributedSampler`. - Only effective when :attr:`use_ddp` is True. - batch_size: int - Batch size. - sparse_format: str, default = "coo" - The sparse format of the emitted sampled graphs. Choose between "csc" - and "coo". When using "csc", the graphs are of type - cugraph_dgl.nn.SparseGraph. - kwargs : dict - Key-word arguments to be passed to the parent PyTorch - :py:class:`torch.utils.data.DataLoader` class. Common arguments are: - - ``batch_size`` (int): The number of indices in each batch. - - ``drop_last`` (bool): Whether to drop the last incomplete - batch. - - ``shuffle`` (bool): Whether to randomly shuffle the - indices at each epoch - Examples - -------- - To train a 3-layer GNN for node classification on a set of nodes - ``train_nid`` on a homogeneous graph where each node takes messages - from 15 neighbors on the first layer, 10 neighbors on the second, and - 5 neighbors on the third: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for input_nodes, output_nodes, blocks in dataloader: - ... train_on(input_nodes, output_nodes, blocks) - **Using with Distributed Data Parallel** - If you are using PyTorch's distributed training (e.g. when using - :mod:`torch.nn.parallel.DistributedDataParallel`), - you can train the model by turning - on the `use_ddp` option: - >>> sampler = cugraph_dgl.dataloading.NeighborSampler([15, 10, 5]) - >>> dataloader = cugraph_dgl.dataloading.DataLoader( - ... g, train_nid, sampler, use_ddp=True, - ... batch_size=1024, shuffle=True, drop_last=False, num_workers=0) - >>> for epoch in range(start_epoch, n_epochs): - ... for input_nodes, output_nodes, blocks in dataloader: - ... + Parameters + ---------- + graph: cugraph_dgl.Graph + The graph being sampled. Can be a single-GPU or multi-GPU graph. + indices: TensorType + The seed nodes for sampling. If use_ddp=True, then all seed + nodes should be provided. If use_ddp=False, then only the seed + nodes assigned to this worker should be provided. + graph_sampler: cugraph_dgl.dataloading.Sampler + The sampler responsible for sampling the graph and producing + output minibatches. + device: Union[int, str, torch.device] + Optional. + The device assigned to this loader ('cpu', 'cuda' or device id). + Defaults to the current device. + use_ddp: bool + Optional (default=False). + If true, this argument will assume the entire list of input seed + nodes is being passed to each worker, and will appropriately + split and shuffle the list. + It false, then it is assumed that the list of input seed nodes + is comprised of the union of the lists provided to each worker. + ddp_seed: int + Optional (default=0). + The seed used for dividing and shuffling data if use_ddp=True. + Has no effect if use_ddp=False. + use_uva: bool + Optional (default=False). + Whether to use pinned memory and unified virtual addressing + to perform sampling. + This argument is ignored by cuGraph-DGL. + use_prefetch_thread: bool + Optional (default=False). + Whether to spawn a new thread for feature fetching. + This argument is ignored by cuGraph-DGL. + use_alternate_streams: bool + Optional (default=False). + Whether to perform feature fetching on a separate stream. + This argument is ignored by cuGraph-DGL. + pin_prefetcher: bool + Optional (default=False). + Whether to pin the feature tensors. + This argument is currently ignored by cuGraph-DGL. + gpu_cache: Dict[str, Dict[str, int]] + List of features to cache using HugeCTR. + This argument is not supported by cuGraph-DGL and + will result in an error. + output_format: str + Optional (default="dgl.Block"). + The output format for blocks. + Can be either "dgl.Block" or "cugraph_dgl.nn.SparseGraph". """ - if sparse_format not in ["coo", "csc"]: + + if use_uva: + warnings.warn("The 'use_uva' argument is ignored by cuGraph-DGL.") + if use_prefetch_thread: + warnings.warn( + "The 'use_prefetch_thread' argument is ignored by cuGraph-DGL." + ) + if use_alternate_streams: + warnings.warn( + "The 'use_alternate_streams' argument is ignored by cuGraph-DGL." + ) + if pin_prefetcher: + warnings.warn("The 'pin_prefetcher' argument is ignored by cuGraph-DGL.") + if gpu_cache: raise ValueError( - f"sparse_format must be one of 'coo', 'csc', " - f"but got {sparse_format}." + "HugeCTR is not supported by cuGraph-DGL. " + "Consider using WholeGraph for feature storage" + " in cugraph_dgl.Graph instead." ) - self.sparse_format = sparse_format - self.ddp_seed = ddp_seed - self.use_ddp = use_ddp - self.shuffle = shuffle - self.drop_last = drop_last - self.graph_sampler = graph_sampler - worker_init_fn = dgl.dataloading.WorkerInitWrapper( - kwargs.get("worker_init_fn", None) - ) - self.other_storages = {} - self.epoch_number = 0 - self._batch_size = batch_size - self._sampling_output_dir = sampling_output_dir - self._batches_per_partition = batches_per_partition - self._seeds_per_call = seeds_per_call - self._rank = None - - indices = _dgl_idx_to_cugraph_idx(indices, graph) + indices = _cast_to_torch_tensor(indices) - self.tensorized_indices_ds = dgl.dataloading.create_tensorized_dataset( + self.__dataset = dgl.dataloading.create_tensorized_dataset( indices, batch_size, drop_last, @@ -164,158 +134,25 @@ def __init__( kwargs.get("persistent_workers", False), ) - if len(graph.ntypes) <= 1: - self.cugraph_dgl_dataset = HomogenousBulkSamplerDataset( - total_number_of_nodes=graph.total_number_of_nodes, - edge_dir=self.graph_sampler.edge_dir, - sparse_format=sparse_format, - ) - else: - etype_id_to_etype_str_dict = {v: k for k, v in graph._etype_id_dict.items()} - - self.cugraph_dgl_dataset = HeterogenousBulkSamplerDataset( - num_nodes_dict=graph.num_nodes_dict, - etype_id_dict=etype_id_to_etype_str_dict, - etype_offset_dict=graph._etype_offset_d, - ntype_offset_dict=graph._ntype_offset_d, - edge_dir=self.graph_sampler.edge_dir, - ) + self.__output_format = output_format + self.__sampler = graph_sampler + self.__batch_size = batch_size + self.__graph = graph + self.__device = device - if use_ddp: - rank = torch.distributed.get_rank() - client = default_client() - self._graph_creation_event = Event("cugraph_dgl_load_mg_graph_event") - if rank == 0: - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - client.publish_dataset(cugraph_dgl_mg_graph_ds=G) - self._graph_creation_event.set() - else: - if self._graph_creation_event.wait(timeout=1000): - G = client.get_dataset("cugraph_dgl_mg_graph_ds") - else: - raise RuntimeError( - f"Fetch cugraph_dgl_mg_graph_ds to worker_id {rank}", - "from worker_id 0 failed", - ) - else: - rank = 0 - G = create_cugraph_graph_from_edges_dict( - edges_dict=graph._edges_dict, - etype_id_dict=graph._etype_id_dict, - edge_dir=graph_sampler.edge_dir, - ) - - self._rank = rank - self._cugraph_graph = G - super().__init__( - self.cugraph_dgl_dataset, - batch_size=None, - worker_init_fn=worker_init_fn, - collate_fn=lambda x: x, # Hack to prevent collating - **kwargs, - ) + @property + def dataset( + self, + ) -> Union[ + "dgl.dataloading.dataloader.TensorizedDataset", + "dgl.dataloading.dataloader.DDPTensorizedDataset", + ]: + return self.__dataset def __iter__(self): - output_dir = os.path.join( - self._sampling_output_dir, "epoch_" + str(self.epoch_number) + # TODO move to the correct device (rapidsai/cugraph-gnn#11) + return self.__sampler.sample( + self.__graph, + self.__dataset, + batch_size=self.__batch_size, ) - kwargs = {} - if isinstance(self.cugraph_dgl_dataset, HomogenousBulkSamplerDataset): - kwargs["deduplicate_sources"] = True - kwargs["prior_sources_behavior"] = "carryover" - kwargs["renumber"] = True - - if self.sparse_format == "csc": - kwargs["compression"] = "CSR" - kwargs["compress_per_hop"] = True - # The following kwargs will be deprecated in uniform sampler. - kwargs["use_legacy_names"] = False - kwargs["include_hop_column"] = False - - else: - kwargs["deduplicate_sources"] = False - kwargs["prior_sources_behavior"] = None - kwargs["renumber"] = False - - bs = BulkSampler( - output_path=output_dir, - batch_size=self._batch_size, - graph=self._cugraph_graph, - batches_per_partition=self._batches_per_partition, - seeds_per_call=self._seeds_per_call, - fanout_vals=self.graph_sampler._reversed_fanout_vals, - with_replacement=self.graph_sampler.replace, - **kwargs, - ) - - if self.shuffle: - self.tensorized_indices_ds.shuffle() - - batch_df = create_batch_df(self.tensorized_indices_ds) - bs.add_batches(batch_df, start_col_name="start", batch_col_name="batch_id") - bs.flush() - self.cugraph_dgl_dataset.set_input_files(input_directory=output_dir) - self.epoch_number = self.epoch_number + 1 - return super().__iter__() - - def __del__(self): - if self.use_ddp: - torch.distributed.barrier() - if self._rank == 0: - if self.use_ddp: - client = default_client() - client.unpublish_dataset("cugraph_dgl_mg_graph_ds") - self._graph_creation_event.clear() - _clean_directory(self._sampling_output_dir) - - -def get_batch_id_series(n_output_rows: int, batch_size: int): - num_batches = (n_output_rows + batch_size - 1) // batch_size - print(f"Number of batches = {num_batches}".format(num_batches)) - batch_ar = cp.arange(0, num_batches).repeat(batch_size) - batch_ar = batch_ar[0:n_output_rows].astype(cp.int32) - return cudf.Series(batch_ar) - - -def create_batch_df(dataset: torch.Tensor): - batch_id_ls = [] - indices_ls = [] - for batch_id, b_indices in enumerate(dataset): - if isinstance(b_indices, dict): - b_indices = torch.cat(list(b_indices.values())) - batch_id_ar = cp.full(shape=len(b_indices), fill_value=batch_id, dtype=cp.int32) - batch_id_ls.append(batch_id_ar) - indices_ls.append(b_indices) - - batch_id_ar = cp.concatenate(batch_id_ls) - indices_ar = cp.asarray(torch.concat(indices_ls)) - batches_df = cudf.DataFrame( - { - "start": indices_ar, - "batch_id": batch_id_ar, - } - ) - return batches_df - - -def _dgl_idx_to_cugraph_idx(idx, cugraph_gs): - if not isinstance(idx, dict): - if len(cugraph_gs.ntypes) > 1: - raise dgl.DGLError( - "Must specify node type when the graph is not homogeneous." - ) - return idx - else: - return {k: cugraph_gs.dgl_n_id_to_cugraph_id(n, k) for k, n in idx.items()} - - -def _clean_directory(path): - """param could either be relative or absolute.""" - if os.path.isfile(path): - os.remove(path) # remove the file - elif os.path.isdir(path): - shutil.rmtree(path) # remove dir and all contains diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index b61f05f6379..1a35c3ea027 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,11 +10,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import annotations -from typing import Sequence +import warnings +import tempfile + +from typing import Sequence, Optional, Union, List, Tuple, Iterator + +from cugraph.gnn import UniformNeighborSampler, DistSampleWriter +from cugraph.utilities.utils import import_optional + +import cugraph_dgl +from cugraph_dgl.typing import DGLSamplerOutput +from cugraph_dgl.dataloading.sampler import Sampler, HomogeneousSampleReader -class NeighborSampler: +torch = import_optional("torch") + + +class NeighborSampler(Sampler): """Sampler that builds computational dependency of node representations via neighbor sampling for multilayer GNN. This sampler will make every node gather messages from a fixed number of neighbors @@ -50,7 +64,88 @@ def __init__( fanouts_per_layer: Sequence[int], edge_dir: str = "in", replace: bool = False, + prob: Optional[str] = None, + mask: Optional[str] = None, + prefetch_node_feats: Optional[Union[List[str], dict[str, List[str]]]] = None, + prefetch_edge_feats: Optional[ + Union[List[str], dict[Tuple[str, str, str], List[str]]] + ] = None, + prefetch_labels: Optional[Union[List[str], dict[str, List[str]]]] = None, + output_device: Optional[Union["torch.device", int, str]] = None, + fused: Optional[bool] = None, + sparse_format="csc", + output_format="dgl.Block", + **kwargs, ): + """ + Parameters + ---------- + fanouts_per_layer: Sequence[int] + The number of neighbors to sample per layer. + edge_dir: str + Optional (default='in'). + The direction to traverse edges. + replace: bool + Optional (default=False). + Whether to sample with replacement. + prob: str + Optional. + If provided, the probability of each neighbor being + sampled is proportional to the edge feature + with the given name. Mutually exclusive with mask. + Currently unsupported. + mask: str + Optional. + If proivided, only neighbors where the edge mask + with the given name is True can be selected. + Mutually exclusive with prob. + Currently unsupported. + prefetch_node_feats: Union[List[str], dict[str, List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + prefetch_edge_feats: Union[List[str], dict[Tuple[str, str, str], List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + prefetch_labels: Union[List[str], dict[str, List[str]]] + Optional. + Currently ignored by cuGraph-DGL. + output_device: Union[torch.device, int, str] + Optional. + Output device for samples. Defaults to the current device. + fused: bool + Optional. + This argument is ignored by cuGraph-DGL. + sparse_format: str + Optional (default = "coo"). + The sparse format of the emitted sampled graphs. + Currently, only "csc" is supported. + output_format: str + Optional (default = "dgl.Block") + The output format of the emitted sampled graphs. + Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". + **kwargs + Keyword arguments for the underlying cuGraph distributed sampler + and writer (directory, batches_per_partition, format, + local_seeds_per_call). + """ + + if mask: + raise NotImplementedError( + "Edge masking is currently unsupported by cuGraph-DGL" + ) + if prob: + raise NotImplementedError( + "Edge masking is currently unsupported by cuGraph-DGL" + ) + if prefetch_edge_feats: + warnings.warn("'prefetch_edge_feats' is ignored by cuGraph-DGL") + if prefetch_node_feats: + warnings.warn("'prefetch_node_feats' is ignored by cuGraph-DGL") + if prefetch_labels: + warnings.warn("'prefetch_labels' is ignored by cuGraph-DGL") + if fused: + warnings.warn("'fused' is ignored by cuGraph-DGL") + self.fanouts = fanouts_per_layer reverse_fanouts = fanouts_per_layer.copy() reverse_fanouts.reverse() @@ -58,3 +153,53 @@ def __init__( self.edge_dir = edge_dir self.replace = replace + self.__kwargs = kwargs + + super().__init__( + sparse_format=sparse_format, + output_format=output_format, + ) + + def sample( + self, + g: "cugraph_dgl.Graph", + indices: Iterator["torch.Tensor"], + batch_size: int = 1, + ) -> Iterator[DGLSamplerOutput]: + kwargs = dict(**self.__kwargs) + + directory = kwargs.pop("directory", None) + if directory is None: + warnings.warn("Setting a directory to store samples is recommended.") + self._tempdir = tempfile.TemporaryDirectory() + directory = self._tempdir.name + + writer = DistSampleWriter( + directory=directory, + batches_per_partition=kwargs.pop("batches_per_partition", 256), + format=kwargs.pop("format", "parquet"), + ) + + ds = UniformNeighborSampler( + g._graph(self.edge_dir), + writer, + compression="CSR", + fanout=self._reversed_fanout_vals, + prior_sources_behavior="carryover", + deduplicate_sources=True, + compress_per_hop=True, + with_replacement=self.replace, + **kwargs, + ) + + if g.is_homogeneous: + indices = torch.concat(list(indices)) + ds.sample_from_nodes(indices, batch_size=batch_size) + return HomogeneousSampleReader( + ds.get_reader(), self.output_format, self.edge_dir + ) + + raise ValueError( + "Sampling heterogeneous graphs is currently" + " unsupported in the non-dask API" + ) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py new file mode 100644 index 00000000000..731ec1b8d6f --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -0,0 +1,193 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Iterator, Dict, Tuple, List, Union + +import cugraph_dgl +from cugraph_dgl.nn import SparseGraph +from cugraph_dgl.typing import DGLSamplerOutput +from cugraph_dgl.dataloading.utils.sampling_helpers import ( + create_homogeneous_sampled_graphs_from_tensors_csc, +) + +from cugraph.gnn import DistSampleReader + +from cugraph.utilities.utils import import_optional + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +class SampleReader: + """ + Iterator that processes results from the cuGraph distributed sampler. + """ + + def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"): + """ + Constructs a new SampleReader. + + Parameters + ---------- + base_reader: DistSampleReader + The reader responsible for loading saved samples produced by + the cuGraph distributed sampler. + """ + self.__output_format = output_format + self.__base_reader = base_reader + self.__num_samples_remaining = 0 + self.__index = 0 + + @property + def output_format(self) -> str: + return self.__output_format + + def __next__(self) -> DGLSamplerOutput: + if self.__num_samples_remaining == 0: + # raw_sample_data is already a dict of tensors + self.__raw_sample_data, start_inclusive, end_inclusive = next( + self.__base_reader + ) + + self.__decoded_samples = self._decode_all(self.__raw_sample_data) + self.__num_samples_remaining = end_inclusive - start_inclusive + 1 + self.__index = 0 + + out = self.__decoded_samples[self.__index] + self.__index += 1 + self.__num_samples_remaining -= 1 + return out + + def _decode_all(self) -> List[DGLSamplerOutput]: + raise NotImplementedError("Must be implemented by subclass") + + def __iter__(self) -> DGLSamplerOutput: + return self + + +class HomogeneousSampleReader(SampleReader): + """ + Subclass of SampleReader that reads DGL homogeneous output samples + produced by the cuGraph distributed sampler. + """ + + def __init__( + self, + base_reader: DistSampleReader, + output_format: str = "dgl.Block", + edge_dir="in", + ): + """ + Constructs a new HomogeneousSampleReader + + Parameters + ---------- + base_reader: DistSampleReader + The reader responsible for loading saved samples produced by + the cuGraph distributed sampler. + output_format: str + The output format for blocks (either "dgl.Block" or + "cugraph_dgl.nn.SparseGraph"). + edge_dir: str + The direction sampling was performed in ("in" or "out"). + """ + + self.__edge_dir = edge_dir + super().__init__(base_reader, output_format=output_format) + + def __decode_csc( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: + return create_homogeneous_sampled_graphs_from_tensors_csc( + raw_sample_data, output_format=self.output_format + ) + + def __decode_coo( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: + raise NotImplementedError( + "COO format is currently unsupported in the non-dask API" + ) + + def _decode_all( + self, raw_sample_data: Dict[str, "torch.Tensor"] + ) -> List[DGLSamplerOutput]: + if "major_offsets" in raw_sample_data: + return self.__decode_csc(raw_sample_data) + else: + return self.__decode_coo(raw_sample_data) + + +class Sampler: + """ + Base sampler class for all cugraph-DGL samplers. + """ + + def __init__(self, sparse_format: str = "csc", output_format="dgl.Block"): + """ + Parameters + ---------- + sparse_format: str + Optional (default = "coo"). + The sparse format of the emitted sampled graphs. + Currently, only "csc" is supported. + output_format: str + Optional (default = "dgl.Block") + The output format of the emitted sampled graphs. + Can be either "dgl.Block" (default), or "cugraph_dgl.nn.SparseGraph". + """ + + if sparse_format != "csc": + raise ValueError("Only CSC format is supported at this time") + + self.__output_format = output_format + + @property + def output_format(self): + return self.__output_format + + @property + def sparse_format(self): + return self.__sparse_format + + def sample( + self, + g: cugraph_dgl.Graph, + indices: Iterator["torch.Tensor"], + batch_size: int = 1, + ) -> Iterator[ + Tuple["torch.Tensor", "torch.Tensor", List[Union[SparseGraph, "dgl.Block"]]] + ]: + """ + Samples the graph. + + Parameters + ---------- + g: cugraph_dgl.Graph + The graph being sampled. + indices: TensorType + The node ids of seed nodes where sampling will initiate from. + batch_size: int + The number of seed nodes per batch. + + Returns + ------- + Iterator[DGLSamplerOutput] + Iterator over batches. The returned tuples are in standard + DGL format: (input nodes, output nodes, blocks) where input + nodes are the renumbered input nodes, output nodes are + the renumbered output nodes, and blocks are the output graphs + for each hop. + """ + + raise NotImplementedError("Must be implemented by subclass") diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index 10d851ebade..3b7e4502134 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -404,21 +404,21 @@ def create_heterogenous_dgl_block_from_tensors_dict( return block -def _process_sampled_df_csc( - df: cudf.DataFrame, +def _process_sampled_tensors_csc( + tensors: Dict["torch.Tensor"], reverse_hop_id: bool = True, ) -> Tuple[ - Dict[int, Dict[int, Dict[str, torch.Tensor]]], - List[torch.Tensor], + Dict[int, Dict[int, Dict[str, "torch.Tensor"]]], + List["torch.Tensor"], List[List[int, int]], ]: """ - Convert a dataframe generated by BulkSampler to a dictionary of tensors, to + Convert tensors generated by BulkSampler to a dictionary of tensors, to facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. Parameters ---------- - df: cudf.DataFrame + tensors: Dict[torch.Tensor] The output from BulkSampler compressed in CSC format. The dataframe should be generated with `compression="CSR"` in BulkSampler, since the sampling routine treats seed nodes as sources. @@ -442,12 +442,12 @@ def _process_sampled_df_csc( k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and destinations, respectively. """ - # dropna - major_offsets = cast_to_tensor(df.major_offsets.dropna()) - label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna()) - renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna()) - renumber_map = cast_to_tensor(df["map"].dropna()) - minors = cast_to_tensor(df.minors.dropna()) + + major_offsets = tensors["major_offsets"] + minors = tensors["minors"] + label_hop_offsets = tensors["label_hop_offsets"] + renumber_map = tensors["map"] + renumber_map_offsets = tensors["renumber_map_offsets"] n_batches = len(renumber_map_offsets) - 1 n_hops = int((len(label_hop_offsets) - 1) / n_batches) @@ -511,6 +511,115 @@ def _process_sampled_df_csc( return tensors_dict, renumber_map_list, mfg_sizes.tolist() +def _process_sampled_df_csc( + df: cudf.DataFrame, + reverse_hop_id: bool = True, +): + """ + Convert a dataframe generated by BulkSampler to a dictionary of tensors, to + facilitate MFG creation. The sampled graphs in the dataframe use CSC-format. + + Parameters + ---------- + df: cudf.DataFrame + The output from BulkSampler compressed in CSC format. The dataframe + should be generated with `compression="CSR"` in BulkSampler, + since the sampling routine treats seed nodes as sources. + + reverse_hop_id: bool (default=True) + Reverse hop id. + + Returns + ------- + tensors_dict: dict + A nested dictionary keyed by batch id and hop id. + `tensor_dict[batch_id][hop_id]` holds "minors" and "major_offsets" + values for CSC MFGs. + + renumber_map_list: list + List of renumbering maps for looking up global indices of nodes. One + map for each batch. + + mfg_sizes: list + List of the number of nodes in each message passing layer. For the + k-th hop, mfg_sizes[k] and mfg_sizes[k+1] is the number of sources and + destinations, respectively. + """ + + return _process_sampled_tensors_csc( + { + "major_offsets": cast_to_tensor(df.major_offsets.dropna()), + "label_hop_offsets": cast_to_tensor(df.label_hop_offsets.dropna()), + "renumber_map_offsets": cast_to_tensor(df.renumber_map_offsets.dropna()), + "map": cast_to_tensor(df["map"].dropna()), + "minors": cast_to_tensor(df.minors.dropna()), + }, + reverse_hop_id=reverse_hop_id, + ) + + +def _create_homogeneous_blocks_from_csc( + tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], + renumber_map_list: List[torch.Tensor], + mfg_sizes: List[int, int], +): + """Create mini-batches of MFGs in the dgl.Block format. + The input arguments are the outputs of + the function `_process_sampled_df_csc`. + + Returns + ------- + output: list + A list of mini-batches. Each mini-batch is a list that consists of + `input_nodes` tensor, `output_nodes` tensor and a list of MFGs. + """ + n_batches, n_hops = len(mfg_sizes), len(mfg_sizes[0]) - 1 + output = [] + for b_id in range(n_batches): + output_batch = [] + output_batch.append(renumber_map_list[b_id]) + output_batch.append(renumber_map_list[b_id][: mfg_sizes[b_id][-1]]) + + mfgs = [ + SparseGraph( + size=(mfg_sizes[b_id][h_id], mfg_sizes[b_id][h_id + 1]), + src_ids=tensors_dict[b_id][h_id]["minors"], + cdst_ids=tensors_dict[b_id][h_id]["major_offsets"], + formats=["csc", "coo"], + reduce_memory=True, + ) + for h_id in range(n_hops) + ] + + blocks = [] + seednodes_range = None + for mfg in reversed(mfgs): + block_mfg = _create_homogeneous_dgl_block_from_tensor_d( + { + "sources": mfg.src_ids(), + "destinations": mfg.dst_ids(), + "sources_range": mfg._num_src_nodes - 1, + "destinations_range": mfg._num_dst_nodes - 1, + }, + renumber_map=renumber_map_list[b_id], + seednodes_range=seednodes_range, + ) + + seednodes_range = max( + mfg._num_src_nodes - 1, + mfg._num_dst_nodes - 1, + ) + blocks.append(block_mfg) + del mfgs + + blocks.reverse() + + output_batch.append(blocks) + + output.append(output_batch) + return output + + def _create_homogeneous_sparse_graphs_from_csc( tensors_dict: Dict[int, Dict[int, Dict[str, torch.Tensor]]], renumber_map_list: List[torch.Tensor], @@ -549,9 +658,35 @@ def _create_homogeneous_sparse_graphs_from_csc( return output -def create_homogeneous_sampled_graphs_from_dataframe_csc(sampled_df: cudf.DataFrame): +def create_homogeneous_sampled_graphs_from_dataframe_csc( + sampled_df: cudf.DataFrame, output_format: str = "cugraph_dgl.nn.SparseGraph" +): + """Public API to create mini-batches of MFGs using a dataframe output by + BulkSampler, where the sampled graph is compressed in CSC format.""" + if output_format == "cugraph_dgl.nn.SparseGraph": + return _create_homogeneous_sparse_graphs_from_csc( + *(_process_sampled_df_csc(sampled_df)), + ) + elif output_format == "dgl.Block": + return _create_homogeneous_blocks_from_csc( + *(_process_sampled_df_csc(sampled_df)), + ) + else: + raise ValueError(f"Invalid output format {output_format}") + + +def create_homogeneous_sampled_graphs_from_tensors_csc( + tensors: Dict["torch.Tensor"], output_format: str = "cugraph_dgl.nn.SparseGraph" +): """Public API to create mini-batches of MFGs using a dataframe output by BulkSampler, where the sampled graph is compressed in CSC format.""" - return _create_homogeneous_sparse_graphs_from_csc( - *(_process_sampled_df_csc(sampled_df)) - ) + if output_format == "cugraph_dgl.nn.SparseGraph": + return _create_homogeneous_sparse_graphs_from_csc( + *(_process_sampled_tensors_csc(tensors)), + ) + elif output_format == "dgl.Block": + return _create_homogeneous_blocks_from_csc( + *(_process_sampled_tensors_csc(tensors)), + ) + else: + raise ValueError(f"Invalid output format {output_format}") diff --git a/python/cugraph-dgl/cugraph_dgl/features.py b/python/cugraph-dgl/cugraph_dgl/features.py new file mode 100644 index 00000000000..9dc009f4127 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/features.py @@ -0,0 +1,121 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") +dgl = import_optional("dgl") +wgth = import_optional("pylibwholegraph.torch") + + +class WholeFeatureStore( + object if isinstance(dgl, MissingModule) else dgl.storages.base.FeatureStorage +): + """ + Interface for feature storage. + """ + + def __init__( + self, + tensor: "torch.Tensor", + memory_type: str = "distributed", + location: str = "cpu", + ): + """ + Constructs a new WholeFeatureStore object that wraps a WholeGraph wholememory + distributed tensor. + + Parameters + ---------- + t: torch.Tensor + The local slice of the tensor being distributed. These should be in order + by rank (i.e. rank 0 contains elements 0-9, rank 1 contains elements 10-19, + rank 3 contains elements 20-29, etc.) The sizes do not need to be equal. + memory_type: str (optional, default='distributed') + The memory type of this store. Options are + 'distributed', 'chunked', and 'continuous'. + For more information consult the WholeGraph + documentation. + location: str(optional, default='cpu') + The location ('cpu' or 'cuda') where data is stored. + """ + self.__wg_comm = wgth.get_global_communicator() + + if len(tensor.shape) > 2: + raise ValueError("Only 1-D or 2-D tensors are supported by WholeGraph.") + + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + ld = torch.tensor(tensor.shape[0], device="cuda", dtype=torch.int64) + sizes = torch.empty((world_size,), device="cuda", dtype=torch.int64) + torch.distributed.all_gather_into_tensor(sizes, ld) + + sizes = sizes.cpu() + ld = sizes.sum() + + self.__td = -1 if len(tensor.shape) == 1 else tensor.shape[1] + global_shape = [ + int(ld), + self.__td if self.__td > 0 else 1, + ] + + if self.__td < 0: + tensor = tensor.reshape((tensor.shape[0], 1)) + + wg_tensor = wgth.create_wholememory_tensor( + self.__wg_comm, + memory_type, + location, + global_shape, + tensor.dtype, + [global_shape[1], 1], + ) + + offset = sizes[:rank].sum() if rank > 0 else 0 + + wg_tensor.scatter( + tensor.clone(memory_format=torch.contiguous_format).cuda(), + torch.arange( + offset, offset + tensor.shape[0], dtype=torch.int64, device="cuda" + ).contiguous(), + ) + + self.__wg_comm.barrier() + + self.__wg_tensor = wg_tensor + + def requires_ddp(self) -> bool: + return True + + def fetch( + self, + indices: torch.Tensor, + device: torch.cuda.Device, + pin_memory=False, + **kwargs, + ): + if pin_memory: + warnings.warn("pin_memory has no effect for WholeFeatureStorage.") + + t = self.__wg_tensor.gather( + indices.cuda(), + force_dtype=self.__wg_tensor.dtype, + ) + + if self.__td < 0: + t = t.reshape((t.shape[0],)) + + return t.to(torch.device(device)) diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py new file mode 100644 index 00000000000..2eba13c6958 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -0,0 +1,910 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +from typing import Union, Optional, Dict, Tuple, List + +from cugraph.utilities.utils import import_optional +from cugraph.gnn import cugraph_comms_get_raft_handle + +import cupy +import pylibcugraph + +from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor +from cugraph_dgl.features import WholeFeatureStore +from cugraph_dgl.view import ( + HeteroNodeView, + HeteroNodeDataView, + HeteroEdgeView, + HeteroEdgeDataView, +) + + +# Have to use import_optional even though these are required +# dependencies in order to build properly. +dgl = import_optional("dgl") +torch = import_optional("torch") +tensordict = import_optional("tensordict") + +HOMOGENEOUS_NODE_TYPE = "n" +HOMOGENEOUS_EDGE_TYPE = (HOMOGENEOUS_NODE_TYPE, "e", HOMOGENEOUS_NODE_TYPE) + + +class Graph: + """ + cuGraph-backed duck-typed version of dgl.DGLGraph that distributes + the graph across workers. This object uses lazy graph creation. + Users can repeatedly call add_edges, and the tensors won't + be converted into a cuGraph graph until one is needed + (i.e. when creating a loader). Supports + single-node/single-GPU, single-node/multi-GPU, and + multi-node/multi-GPU graph storage. + + Each worker should have a slice of the graph locally, and + call put_edge_index with its slice. + """ + + def __init__( + self, + is_multi_gpu: bool = False, + ndata_storage="torch", + edata_storage="torch", + **kwargs, + ): + """ + Parameters + ---------- + is_multi_gpu: bool (optional, default=False) + Specifies whether this graph is distributed across GPUs. + ndata_storage: str (optional, default='torch') + Specifies where node data should be stored + (options are 'torch' and 'wholegraph'). + If using PyTorch tensors for storage ('torch') + then data will be replicated across workers and data + for all nodes should be provided when calling add_nodes. + If using WholeGraph wholememory tensors for storage, + then data will be distributed across workers and only + the local slice of the data should be provided when + calling add_nodes. + edata_storage: str (optional, default='torch') + If using PyTorch tensors for storage ('torch') + then data will be replicated across workers and data + for all nodes should be provided when calling add_edge. + If using WholeGraph wholememory tensors for storage, + then data will be distributed across workers and only + the local slice of the data should be provided when + calling add_edges. + kwargs: + Optional kwargs for WholeGraph feature storage. + """ + + if ndata_storage not in ("torch", "wholegraph"): + raise ValueError( + "Invalid node storage type (valid types are 'torch' and 'wholegraph')" + ) + if edata_storage not in ("torch", "wholegraph"): + raise ValueError( + "Invalid edge storage type (valid types are 'torch' and 'wholegraph')" + ) + + self.__num_nodes_dict = {} + self.__num_edges_dict = {} + self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) + + self.__graph = None + self.__vertex_offsets = None + self.__handle = None + self.__is_multi_gpu = is_multi_gpu + + self.__ndata_storage_type = ( + WholeFeatureStore + if ndata_storage == "wholegraph" + else dgl.storages.pytorch_tensor.PyTorchTensorStorage + ) + self.__edata_storage_type = ( + WholeFeatureStore + if edata_storage == "wholegraph" + else dgl.storages.pytorch_tensor.PyTorchTensorStorage + ) + self.__ndata_storage = {} + self.__edata_storage = {} + self.__wg_kwargs = kwargs + + @property + def is_multi_gpu(self): + return self.__is_multi_gpu + + def to_canonical_etype( + self, etype: Union[str, Tuple[str, str, str]] + ) -> Tuple[str, str, str]: + if etype is None: + if len(self.canonical_etypes) > 1: + raise ValueError("Edge type is required for heterogeneous graphs.") + return HOMOGENEOUS_EDGE_TYPE + + if isinstance(etype, tuple) and len(etype) == 3: + return etype + + for src_type, rel_type, dst_type in self.__edge_indices.keys( + leaves_only=True, include_nested=True + ): + if etype == rel_type: + return (src_type, rel_type, dst_type) + + raise ValueError("Unknown relation type " + etype) + + def add_nodes( + self, + global_num_nodes: int, + data: Optional[Dict[str, TensorType]] = None, + ntype: Optional[str] = None, + ): + """ + Adds the given number of nodes to this graph. Can only be called once + per node type. The number of nodes specified here refers to the total + number of nodes across all workers (the entire graph). If the backing + feature store is distributed (i.e. wholegraph), then only local features + should be passed to the data argument. If the backing feature store is + replicated, then features for all nodes in the graph should be passed to + the data argument, including those for nodes not on the local worker. + + Parameters + ---------- + global_num_nodes: int + The total number of nodes of the given type in this graph. + The same number should be passed to every worker. + data: Dict[str, TensorType] (optional, default=None) + Node feature tensors. + ntype: str (optional, default=None) + The node type being modified. Required for heterogeneous graphs. + """ + if ntype is None: + if len(self.__num_nodes_dict.keys()) > 1: + raise ValueError("Node type is required for heterogeneous graphs.") + ntype = HOMOGENEOUS_NODE_TYPE + + if ntype in self.__num_nodes_dict: + raise ValueError( + "Calling add_nodes multiple types for the same " + "node type is not allowed in cuGraph-DGL" + ) + + if self.is_multi_gpu: + # Ensure all nodes got the same number of nodes passed + world_size = torch.distributed.get_world_size() + local_size = torch.tensor( + [global_num_nodes], device="cuda", dtype=torch.int64 + ) + ns = torch.empty((world_size,), device="cuda", dtype=torch.int64) + torch.distributed.all_gather_into_tensor(ns, local_size) + if not (ns == global_num_nodes).all(): + raise ValueError("The global number of nodes must match on all workers") + + # Ensure the sum of the feature shapes equals the global number of nodes. + if data is not None: + for feature_name, feature_tensor in data.items(): + features_size = torch.tensor( + [int(feature_tensor.shape[0])], device="cuda", dtype=torch.int64 + ) + torch.distributed.all_reduce( + features_size, op=torch.distributed.ReduceOp.SUM + ) + if features_size != global_num_nodes: + raise ValueError( + "The total length of the feature vector across workers must" + " match the global number of nodes but it does not " + f"match for {feature_name}." + ) + + self.__num_nodes_dict[ntype] = global_num_nodes + + if data is not None: + for feature_name, feature_tensor in data.items(): + self.__ndata_storage[ntype, feature_name] = self.__ndata_storage_type( + _cast_to_torch_tensor(feature_tensor), **self.__wg_kwargs + ) + + self.__graph = None + self.__vertex_offsets = None + + def __check_node_ids(self, ntype: str, ids: TensorType): + """ + Ensures all node ids in the provided id tensor are valid. + Raises a ValueError if any are invalid. + + Parameters + ---------- + ntype: str + The node type being validated against. + ids: + The tensor of ids being validated. + """ + if ntype in self.__num_nodes_dict: + if ids.max() + 1 > self.num_nodes(ntype): + raise ValueError( + f"input tensor contains invalid node ids for type {ntype}" + ) + else: + raise ValueError( + f"add_nodes() must be called for type {ntype} before calling num_edges." + ) + + def add_edges( + self, + u: TensorType, + v: TensorType, + data: Optional[Dict[str, TensorType]] = None, + etype: Optional[Union[str, Tuple[str, str, str]]] = None, + ) -> None: + """ + Adds edges to this graph. Must be called after add_nodes + is called for the src/dst node type. If the backing feature + store is distributed (i.e. wholegraph), then only local + features should be passed to the data argument. If the + backing feature store is replicated, then features for + all edges should be passed to the data argument, + including those for edges not on the local worker. + + Parameters + ---------- + u: TensorType + 1d tensor of source node ids (local slice of the distributed edgelist). + v: TensorType + 1d tensor of destination node ids (local slice of the distributed edgelist). + data: Dict[str, TensorType] (optional, default=None) + Dictionary containing edge features for the new edges. + etype: Union[str, Tuple[str, str, str]] + The edge type of the edges being inserted. Not required + for homogeneous graphs, which have only one edge type. + """ + + # Validate all inputs before proceeding + # The number of nodes for the src/dst type needs to be known and there cannot + # be any edges of this type in the graph. + dgl_can_edge_type = self.to_canonical_etype(etype) + src_type, _, dst_type = dgl_can_edge_type + if dgl_can_edge_type in self.__edge_indices.keys( + leaves_only=True, include_nested=True + ): + raise ValueError( + "This cuGraph-DGL graph already contains edges of type" + f" {dgl_can_edge_type}. Calling add_edges multiple times" + " for the same edge type is not supported." + ) + self.__check_node_ids(src_type, u) + self.__check_node_ids(dst_type, v) + + self.__edge_indices[dgl_can_edge_type] = torch.stack( + [ + _cast_to_torch_tensor(u), + _cast_to_torch_tensor(v), + ] + ).to(self.idtype) + + if data is not None: + for attr_name, attr_tensor in data.items(): + self.__edata_storage[ + dgl_can_edge_type, attr_name + ] = self.__edata_storage_type( + _cast_to_torch_tensor(attr_tensor), **self.__wg_kwargs + ) + + num_edges = self.__edge_indices[dgl_can_edge_type].shape[1] + if self.is_multi_gpu: + num_edges = torch.tensor([num_edges], device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(num_edges, op=torch.distributed.ReduceOp.SUM) + + self.__num_edges_dict[dgl_can_edge_type] = int(num_edges) + + self.__graph = None + self.__vertex_offsets = None + + def num_nodes(self, ntype: str = None) -> int: + """ + Returns the number of nodes of ntype, or if ntype is not provided, + the total number of nodes in the graph. + """ + if ntype is None: + return sum(self.__num_nodes_dict.values()) + + return self.__num_nodes_dict[ntype] + + def number_of_nodes(self, ntype: str = None) -> int: + """ + Alias for num_nodes. + """ + return self.num_nodes(ntype=ntype) + + def num_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: + """ + Returns the number of edges of etype, or if etype is not provided, + the total number of edges in the graph. + """ + if etype is None: + return sum(self.__num_edges_dict.values()) + + etype = self.to_canonical_etype(etype) + return self.__num_edges_dict[etype] + + def number_of_edges(self, etype: Union[str, Tuple[str, str, str]] = None) -> int: + """ + Alias for num_edges. + """ + return self.num_edges(etype=etype) + + @property + def ntypes(self) -> List[str]: + """ + Returns the node type names in this graph. + """ + return list(self.__num_nodes_dict.keys()) + + @property + def etypes(self) -> List[str]: + """ + Returns the edge type names in this graph + (the second element of the canonical edge + type tuple). + """ + return [et[1] for et in self.__num_edges_dict.keys()] + + @property + def canonical_etypes(self) -> List[str]: + """ + Returns the canonical edge type names in this + graph. + """ + return list(self.__num_edges_dict.keys()) + + @property + def _vertex_offsets(self) -> Dict[str, int]: + if self.__vertex_offsets is None: + ordered_keys = sorted(list(self.ntypes)) + self.__vertex_offsets = {} + offset = 0 + for vtype in ordered_keys: + self.__vertex_offsets[vtype] = offset + offset += self.num_nodes(vtype) + + return dict(self.__vertex_offsets) + + def __get_edgelist(self) -> Dict[str, "torch.Tensor"]: + """ + This function always returns src/dst labels with respect + to the out direction. + + Returns + ------- + Dict[str, torch.Tensor] with the following keys: + src: source vertices (int64) + Note that src is the 1st element of the DGL edge index. + dst: destination vertices (int64) + Note that dst is the 2nd element of the DGL edge index. + eid: edge ids for each edge (int64) + Note that these start from 0 for each edge type. + etp: edge types for each edge (int32) + Note that these are in lexicographic order. + """ + sorted_keys = sorted( + list(self.__edge_indices.keys(leaves_only=True, include_nested=True)) + ) + + # note that this still follows the DGL convention of (src, rel, dst) + # i.e. (author, writes, paper): [[0,1,2],[2,0,1]] is referring to a + # cuGraph graph where (paper 2) -> (author 0), (paper 0) -> (author 1), + # and (paper 1) -> (author 0) + edge_index = torch.concat( + [ + torch.stack( + [ + self.__edge_indices[src_type, rel_type, dst_type][0] + + self._vertex_offsets[src_type], + self.__edge_indices[src_type, rel_type, dst_type][1] + + self._vertex_offsets[dst_type], + ] + ) + for (src_type, rel_type, dst_type) in sorted_keys + ], + axis=1, + ).cuda() + + edge_type_array = torch.arange( + len(sorted_keys), dtype=torch.int32, device="cuda" + ).repeat_interleave( + torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], + device="cuda", + dtype=torch.int32, + ) + ) + + if self.is_multi_gpu: + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + num_edges_t = torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" + ) + num_edges_all_t = torch.empty( + world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" + ) + torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) + + if rank > 0: + start_offsets = num_edges_all_t[:rank].T.sum(axis=1) + edge_id_array = torch.concat( + [ + torch.arange( + start_offsets[i], + start_offsets[i] + num_edges_all_t[rank][i], + dtype=torch.int64, + device="cuda", + ) + for i in range(len(sorted_keys)) + ] + ) + else: + edge_id_array = torch.concat( + [ + torch.arange( + self.__edge_indices[et].shape[1], + dtype=torch.int64, + device="cuda", + ) + for et in sorted_keys + ] + ) + + else: + # single GPU + edge_id_array = torch.concat( + [ + torch.arange( + self.__edge_indices[et].shape[1], + dtype=torch.int64, + device="cuda", + ) + for et in sorted_keys + ] + ) + + return { + "src": edge_index[0], + "dst": edge_index[1], + "etp": edge_type_array, + "eid": edge_id_array, + } + + @property + def is_homogeneous(self): + return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <= 1 + + @property + def idtype(self): + return torch.int64 + + @property + def _resource_handle(self): + if self.__handle is None: + if self.is_multi_gpu: + self.__handle = pylibcugraph.ResourceHandle( + cugraph_comms_get_raft_handle().getHandle() + ) + else: + self.__handle = pylibcugraph.ResourceHandle() + return self.__handle + + def _graph( + self, direction: str + ) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: + """ + Gets the pylibcugraph Graph object with edges pointing in the given direction + (i.e. 'out' is standard, 'in' is reverse). + """ + + if direction not in ["out", "in"]: + raise ValueError(f"Invalid direction {direction} (expected 'in' or 'out').") + + graph_properties = pylibcugraph.GraphProperties( + is_multigraph=True, is_symmetric=False + ) + + if self.__graph is not None and self.__graph[1] != direction: + self.__graph = None + + if self.__graph is None: + src_col, dst_col = ("src", "dst") if direction == "out" else ("dst", "src") + edgelist_dict = self.__get_edgelist() + + if self.is_multi_gpu: + rank = torch.distributed.get_rank() + world_size = torch.distributed.get_world_size() + + vertices_array = cupy.arange(self.num_nodes(), dtype="int64") + vertices_array = cupy.array_split(vertices_array, world_size)[rank] + + self.__graph = ( + pylibcugraph.MGGraph( + self._resource_handle, + graph_properties, + [cupy.asarray(edgelist_dict[src_col]).astype("int64")], + [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], + vertices_array=[vertices_array], + edge_id_array=[cupy.asarray(edgelist_dict["eid"])], + edge_type_array=[cupy.asarray(edgelist_dict["etp"])], + ), + direction, + ) + else: + self.__graph = ( + pylibcugraph.SGGraph( + self._resource_handle, + graph_properties, + cupy.asarray(edgelist_dict[src_col]).astype("int64"), + cupy.asarray(edgelist_dict[dst_col]).astype("int64"), + vertices_array=cupy.arange(self.num_nodes(), dtype="int64"), + edge_id_array=cupy.asarray(edgelist_dict["eid"]), + edge_type_array=cupy.asarray(edgelist_dict["etp"]), + ), + direction, + ) + + return self.__graph[0] + + def _has_n_emb(self, ntype: str, emb_name: str) -> bool: + return (ntype, emb_name) in self.__ndata_storage + + def _get_n_emb( + self, ntype: str, emb_name: str, u: Union[str, TensorType] + ) -> "torch.Tensor": + """ + Gets the embedding of a single node type. + Unlike DGL, this function takes the string node + type name instead of an integer id. + + Parameters + ---------- + ntype: str + The node type to get the embedding of. + emb_name: str + The embedding name of the embedding to get. + u: Union[str, TensorType] + Nodes to get the representation of, or ALL + to get the representation of all nodes of + the given type. + + Returns + ------- + torch.Tensor + The embedding of the given edge type with the given embedding name. + """ + + if ntype is None: + if len(self.ntypes) == 1: + ntype = HOMOGENEOUS_NODE_TYPE + else: + raise ValueError("Must provide the node type for a heterogeneous graph") + + if dgl.base.is_all(u): + u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu") + + try: + return self.__ndata_storage[ntype, emb_name].fetch( + _cast_to_torch_tensor(u), "cuda" + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__ndata_storage[ntype, emb_name].fetch( + _cast_to_torch_tensor(u).cuda(), "cuda" + ) + + def _has_e_emb(self, etype: Tuple[str, str, str], emb_name: str) -> bool: + return (etype, emb_name) in self.__edata_storage + + def _get_e_emb( + self, etype: Tuple[str, str, str], emb_name: str, u: Union[str, TensorType] + ) -> "torch.Tensor": + """ + Gets the embedding of a single edge type. + Unlike DGL, this function takes the canonical edge type + instead of an integer id. + + Parameters + ---------- + etype: str + The edge type to get the embedding of. + emb_name: str + The embedding name of the embedding to get. + u: Union[str, TensorType] + Edges to get the representation of, or ALL to + get the representation of all nodes of the + given type. + + Returns + ------- + torch.Tensor + The embedding of the given edge type with the given embedding name. + """ + + etype = self.to_canonical_etype(etype) + + if dgl.base.is_all(u): + u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu") + + try: + return self.__edata_storage[etype, emb_name].fetch( + _cast_to_torch_tensor(u), "cuda" + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__edata_storage[etype, emb_name].fetch( + _cast_to_torch_tensor(u).cuda(), "cuda" + ) + + def _set_n_emb( + self, ntype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] + ) -> None: + """ + Stores or updates the embedding(s) of a single node type. + Unlike DGL, this function takes the string node type name + instead of an integer id. + + The semantics of this function match those of add_nodes + with respect to whether or not the backing feature store + is distributed. + + Parameters + ---------- + ntype: str + The node type to store an embedding of. + u: Union[str, TensorType] + The indices to update, if updating the embedding. + Currently, updating a slice of an embedding is + unsupported, so this should be ALL. + kv: Dict[str, TensorType] + A mapping of embedding names to embedding tensors. + """ + + if not dgl.base.is_all(u): + raise NotImplementedError( + "Updating a slice of an embedding is " + "currently unimplemented in cuGraph-DGL." + ) + + for k, v in kv: + self.__ndata_storage[ntype, k] = self.__ndata_storage_type( + v, + **self.__wg_kwargs, + ) + + def _set_e_emb( + self, etype: str, u: Union[str, TensorType], kv: Dict[str, TensorType] + ) -> None: + """ + Stores or updates the embedding(s) of a single edge type. + Unlike DGL, this function takes the canonical edge type name + instead of an integer id. + + The semantics of this function match those of add_edges + with respect to whether or not the backing feature store + is distributed. + + Parameters + ---------- + etype: str + The edge type to store an embedding of. + u: Union[str, TensorType] + The indices to update, if updating the embedding. + Currently, updating a slice of an embedding is + unsupported, so this should be ALL. + kv: Dict[str, TensorType] + A mapping of embedding names to embedding tensors. + """ + + if not dgl.base.is_all(u): + raise NotImplementedError( + "Updating a slice of an embedding is " + "currently unimplemented in cuGraph-DGL." + ) + + for k, v in kv: + self.__edata_storage[etype, k] = self.__edata_storage_type( + v, + **self.__wg_kwargs, + ) + + def _pop_n_emb(self, ntype: str, key: str) -> "torch.Tensor": + """ + Removes and returns the embedding of the given node + type with the given name. + + Parameters + ---------- + ntype:str + The node type. + key:str + The embedding name. + + Returns + ------- + The removed embedding. + """ + return self.__ndata_storage[ntype, key].pop(key) + + def _pop_e_emb(self, etype: str, key: str) -> "torch.Tensor": + """ + Removes and returns the embedding of the given edge + type with the given name. + + Parameters + ---------- + etype:str + The node type. + key:str + The embedding name. + + Returns + ------- + torch.Tensor + The removed embedding. + """ + return self.__edata_storage[etype, key].pop(key) + + def _get_n_emb_keys(self, ntype: str) -> List[str]: + """ + Gets a list of the embedding names for a given node + type. + + Parameters + ---------- + ntype: str + The node type to get embedding names for. + + Returns + ------- + List[str] + The list of embedding names for the given node type. + """ + return [k for (t, k) in self.__ndata_storage if ntype == t] + + def _get_e_emb_keys(self, etype: str) -> List[str]: + """ + Gets a list of the embedding names for a given edge + type. + + Parameters + ---------- + etype: str + The edge type to get embedding names for. + + Returns + ------- + List[str] + The list of embedding names for the given edge type. + """ + return [k for (t, k) in self.__edata_storage if etype == t] + + def all_edges( + self, + form="uv", + order="eid", + etype: Union[str, Tuple[str, str, str]] = None, + device: Union[str, int, "torch.device"] = "cpu", + ): + """ + Returns all edges with the specified edge type. + cuGraph-DGL currently only supports 'eid' format and + 'eid' order. + + Parameters + ---------- + form: str (optional, default='uv') + The format to return ('uv', 'eid', 'all'). + + order: str (optional, default='eid') + The order to return edges in ('eid', 'srcdst') + cuGraph-DGL currently only supports 'eid'. + etype: Union[str, Tuple[str, str, str]] (optional, default=None) + The edge type to get. Not required if this is + a homogeneous graph. Can be the relation type if the + relation type is unique, or the canonical edge type. + device: Union[str, int, torch.device] (optional, default='cpu') + The device where returned edges should be stored + ('cpu', 'cuda', or device id). + """ + + if order != "eid": + raise NotImplementedError("cugraph-DGL only supports eid order.") + + if etype is None and len(self.canonical_etypes) > 1: + raise ValueError("Edge type is required for heterogeneous graphs.") + + etype = self.to_canonical_etype(etype) + + if form == "eid": + return torch.arange( + 0, + self.__num_edges_dict[etype], + dtype=self.idtype, + device=device, + ) + else: + if self.is_multi_gpu: + # This can't be done because it requires collective communication. + raise ValueError( + "Calling all_edges in a distributed graph with" + " form 'uv' or 'all' is unsupported." + ) + + else: + eix = self.__edge_indices[etype].to(device) + if form == "uv": + return eix[0], eix[1] + elif form == "all": + return ( + eix[0], + eix[1], + torch.arange( + self.__num_edges_dict[etype], + dtype=self.idtype, + device=device, + ), + ) + else: + raise ValueError(f"Invalid form {form}") + + @property + def ndata(self) -> HeteroNodeDataView: + """ + Returns a view of the node data in this graph which can be used to + access or modify node features. + """ + + if len(self.ntypes) == 1: + ntype = self.ntypes[0] + return HeteroNodeDataView(self, ntype, dgl.base.ALL) + + return HeteroNodeDataView(self, self.ntypes, dgl.base.ALL) + + @property + def edata(self) -> HeteroEdgeDataView: + """ + Returns a view of the edge data in this graph which can be used to + access or modify edge features. + """ + if len(self.canonical_etypes) == 1: + return HeteroEdgeDataView(self, None, dgl.base.ALL) + + return HeteroEdgeDataView(self, self.canonical_etypes, dgl.base.ALL) + + @property + def nodes(self) -> HeteroNodeView: + """ + Returns a view of the nodes in this graph. + """ + return HeteroNodeView(self) + + @property + def edges(self) -> HeteroEdgeView: + """ + Returns a view of the edges in this graph. + """ + return HeteroEdgeView(self) diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py index ddd95a76366..d2460f814c9 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,8 @@ from cugraph.utilities.utils import import_optional +import cugraph_dgl + torch = import_optional("torch") ops_torch = import_optional("pylibcugraphops.pytorch") dgl = import_optional("dgl") @@ -255,6 +257,27 @@ def __repr__(self) -> str: f"num_edges={self._src_ids.size(0)}, formats={self._formats})" ) + def to(self, device: Union[torch.device, str, int]) -> "cugraph_dgl.nn.SparseGraph": + sg = SparseGraph( + src_ids=None if self._src_ids is None else self._src_ids.to(device), + dst_ids=None if self._dst_ids is None else self._dst_ids.to(device), + csrc_ids=None if self._csrc_ids is None else self._csrc_ids.to(device), + cdst_ids=None if self._cdst_ids is None else self._cdst_ids.to(device), + values=None if self._values is None else self._values.to(device), + is_sorted=self._is_sorted, + formats=self._formats, + reduce_memory=self._reduce_memory, + ) + + sg._perm_coo2csc = ( + None if self._perm_coo2csc is None else self._perm_coo2csc.to(device) + ) + sg._perm_csc2csr = ( + None if self._perm_csc2csr is None else self._perm_csc2csr.to(device) + ) + + return sg + class BaseConv(torch.nn.Module): r"""An abstract base class for cugraph-ops nn module.""" diff --git a/python/cugraph-dgl/tests/__init__.py b/python/cugraph-dgl/cugraph_dgl/tests/__init__.py similarity index 100% rename from python/cugraph-dgl/tests/__init__.py rename to python/cugraph-dgl/cugraph_dgl/tests/__init__.py diff --git a/python/cugraph-dgl/tests/conftest.py b/python/cugraph-dgl/cugraph_dgl/tests/conftest.py similarity index 100% rename from python/cugraph-dgl/tests/conftest.py rename to python/cugraph-dgl/cugraph_dgl/tests/conftest.py diff --git a/python/cugraph-dgl/tests/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py similarity index 98% rename from python/cugraph-dgl/tests/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py index cc473cd0ad6..e2542657de4 100644 --- a/python/cugraph-dgl/tests/test_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,7 +52,7 @@ def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) tempdir_object = tempfile.TemporaryDirectory() sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DataLoader( + dataloader = cugraph_dgl.dataloading.DaskDataLoader( cugraph_gs, train_nid, sampler, diff --git a/python/cugraph-dgl/tests/mg/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py similarity index 97% rename from python/cugraph-dgl/tests/mg/test_dataloader.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py index 29b7e1c3412..d49e1293e77 100644 --- a/python/cugraph-dgl/tests/mg/test_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dask_dataloader_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,7 +51,7 @@ def sample_cugraph_dgl_graphs(cugraph_gs, train_nid, fanouts): sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) tempdir_object = tempfile.TemporaryDirectory() sampling_output_dir = tempdir_object - dataloader = cugraph_dgl.dataloading.DataLoader( + dataloader = cugraph_dgl.dataloading.DaskDataLoader( cugraph_gs, train_nid, sampler, diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py new file mode 100644 index 00000000000..ef47875463d --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py @@ -0,0 +1,128 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cugraph_dgl.dataloading +import pytest + +import cugraph_dgl + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +import numpy as np + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_basic_homogeneous(): + graph = cugraph_dgl.Graph(is_multi_gpu=False) + + num_nodes = karate.number_of_nodes() + graph.add_nodes(num_nodes, data={"z": torch.arange(num_nodes)}) + + edf = karate.get_edgelist() + graph.add_edges( + u=edf["src"], v=edf["dst"], data={"q": torch.arange(karate.number_of_edges())} + ) + + sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) + loader = cugraph_dgl.dataloading.FutureDataLoader( + graph, torch.arange(num_nodes), sampler, batch_size=2 + ) + + for in_t, out_t, blocks in loader: + assert len(blocks) == 3 + assert len(out_t) <= 2 + + +def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): + # Single fanout to match cugraph + sampler = dgl.dataloading.NeighborSampler(fanouts) + dataloader = dgl.dataloading.DataLoader( + g, + train_nid, + sampler, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + + dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return dgl_output + + +def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + cugraph_g, + train_nid, + sampler, + batch_size=batch_size, + drop_last=False, + shuffle=False, + ) + + cugraph_dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + cugraph_dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return cugraph_dgl_output + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("ix", [[1], [1, 0]]) +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_same_homogeneousgraph_results(ix, batch_size): + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + + train_nid = torch.tensor(ix) + # Create a heterograph with 3 node types and 3 edges types. + dgl_g = dgl.graph((src, dst)) + + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=False) + cugraph_g.add_nodes(9) + cugraph_g.add_edges(u=src, v=dst) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) + cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py new file mode 100644 index 00000000000..b32233f16a6 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py @@ -0,0 +1,181 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import numpy as np + +import cugraph_dgl + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +from cugraph.gnn import ( + cugraph_comms_create_unique_id, + cugraph_comms_shutdown, +) + +from cugraph_dgl.tests.utils import init_pytorch_worker + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +def run_test_dataloader_basic_homogeneous(rank, world_size, uid): + init_pytorch_worker(rank, world_size, uid) + + graph = cugraph_dgl.Graph(is_multi_gpu=True) + + num_nodes = karate.number_of_nodes() + graph.add_nodes( + num_nodes, + ) + + edf = karate.get_edgelist() + graph.add_edges( + u=torch.tensor_split(torch.as_tensor(edf["src"], device="cuda"), world_size)[ + rank + ], + v=torch.tensor_split(torch.as_tensor(edf["dst"], device="cuda"), world_size)[ + rank + ], + ) + + sampler = cugraph_dgl.dataloading.NeighborSampler([5, 5, 5]) + loader = cugraph_dgl.dataloading.FutureDataLoader( + graph, + torch.arange(num_nodes), + sampler, + batch_size=2, + use_ddp=True, + ) + + for in_t, out_t, blocks in loader: + assert len(blocks) == 3 + assert len(out_t) <= 2 + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_basic_homogeneous(): + uid = cugraph_comms_create_unique_id() + # Limit the number of GPUs this rest is run with + world_size = min(torch.cuda.device_count(), 4) + + torch.multiprocessing.spawn( + run_test_dataloader_basic_homogeneous, + args=( + world_size, + uid, + ), + nprocs=world_size, + ) + + +def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): + # Single fanout to match cugraph + sampler = dgl.dataloading.NeighborSampler(fanouts) + dataloader = dgl.dataloading.DataLoader( + g, + train_nid, + sampler, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + + dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return dgl_output + + +def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) + + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + cugraph_g, + train_nid, + sampler, + batch_size=batch_size, + drop_last=False, + shuffle=False, + ) + + cugraph_dgl_output = {} + for batch_id, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + cugraph_dgl_output[batch_id] = { + "input_nodes": input_nodes, + "output_nodes": output_nodes, + "blocks": blocks, + } + return cugraph_dgl_output + + +def run_test_same_homogeneousgraph_results(rank, world_size, uid, ix, batch_size): + init_pytorch_worker(rank, world_size, uid) + + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + + local_src = torch.tensor_split(src, world_size)[rank] + local_dst = torch.tensor_split(dst, world_size)[rank] + + train_nid = torch.tensor(ix) + # Create a heterograph with 3 node types and 3 edges types. + dgl_g = dgl.graph((src, dst)) + + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=True) + cugraph_g.add_nodes(9) + cugraph_g.add_edges(u=local_src, v=local_dst) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [2], batch_size=batch_size) + cugraph_output = sample_cugraph_dgl_graphs(cugraph_g, train_nid, [2], batch_size) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("ix", [[1], [1, 0]]) +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_same_homogeneousgraph_results_mg(ix, batch_size): + uid = cugraph_comms_create_unique_id() + # Limit the number of GPUs this rest is run with + world_size = min(torch.cuda.device_count(), 4) + + torch.multiprocessing.spawn( + run_test_same_homogeneousgraph_results, + args=(world_size, uid, ix, batch_size), + nprocs=world_size, + ) diff --git a/python/cugraph-dgl/tests/test_dataset.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py similarity index 100% rename from python/cugraph-dgl/tests/test_dataset.py rename to python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataset.py diff --git a/python/cugraph-dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_gatconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatconv.py diff --git a/python/cugraph-dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_gatv2conv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_gatv2conv.py diff --git a/python/cugraph-dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_relgraphconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_relgraphconv.py diff --git a/python/cugraph-dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_sageconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_sageconv.py diff --git a/python/cugraph-dgl/tests/nn/test_sparsegraph.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_sparsegraph.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_sparsegraph.py diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py similarity index 100% rename from python/cugraph-dgl/tests/nn/test_transformerconv.py rename to python/cugraph-dgl/cugraph_dgl/tests/nn/test_transformerconv.py diff --git a/python/cugraph-dgl/tests/test_cugraph_storage.py b/python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py similarity index 100% rename from python/cugraph-dgl/tests/test_cugraph_storage.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_cugraph_storage.py diff --git a/python/cugraph-dgl/tests/test_from_dgl_heterograph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py similarity index 83% rename from python/cugraph-dgl/tests/test_from_dgl_heterograph.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py index 128d9bfaca5..667a4a2e66d 100644 --- a/python/cugraph-dgl/tests/test_from_dgl_heterograph.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_from_dgl_heterograph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,9 @@ from cugraph.utilities.utils import import_optional from .utils import ( assert_same_edge_feats, + assert_same_edge_feats_daskapi, assert_same_node_feats, + assert_same_node_feats_daskapi, assert_same_num_edges_can_etypes, assert_same_num_edges_etypes, assert_same_num_nodes, @@ -134,7 +136,7 @@ def create_heterograph4(idtype): @pytest.mark.parametrize("idxtype", [th.int32, th.int64]) -def test_heterograph_conversion_nodes(idxtype): +def test_heterograph_conversion_nodes_daskapi(idxtype): graph_fs = [ create_heterograph1, create_heterograph2, @@ -145,6 +147,39 @@ def test_heterograph_conversion_nodes(idxtype): g = graph_f(idxtype) gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + assert_same_num_nodes(gs, g) + assert_same_node_feats_daskapi(gs, g) + + +@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) +def test_heterograph_conversion_edges_daskapi(idxtype): + graph_fs = [ + create_heterograph1, + create_heterograph2, + create_heterograph3, + create_heterograph4, + ] + for graph_f in graph_fs: + g = graph_f(idxtype) + gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + + assert_same_num_edges_can_etypes(gs, g) + assert_same_num_edges_etypes(gs, g) + assert_same_edge_feats_daskapi(gs, g) + + +@pytest.mark.parametrize("idxtype", [th.int32, th.int64]) +def test_heterograph_conversion_nodes(idxtype): + graph_fs = [ + create_heterograph1, + create_heterograph2, + create_heterograph3, + create_heterograph4, + ] + for graph_f in graph_fs: + g = graph_f(idxtype) + gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) + assert_same_num_nodes(gs, g) assert_same_node_feats(gs, g) @@ -159,7 +194,7 @@ def test_heterograph_conversion_edges(idxtype): ] for graph_f in graph_fs: g = graph_f(idxtype) - gs = cugraph_dgl.cugraph_storage_from_heterograph(g) + gs = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g) assert_same_num_edges_can_etypes(gs, g) assert_same_num_edges_etypes(gs, g) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py new file mode 100644 index 00000000000..a60db97b8d6 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph.py @@ -0,0 +1,217 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import cugraph_dgl +import pylibcugraph +import cupy +import numpy as np + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_homogeneous_graph(direction): + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + wgt = np.random.random((len(df),)) + + graph = cugraph_dgl.Graph() + num_nodes = max(df.src.max(), df.dst.max()) + 1 + node_x = np.random.random((num_nodes,)) + + graph.add_nodes( + num_nodes, data={"num": torch.arange(num_nodes, dtype=torch.int64), "x": node_x} + ) + graph.add_edges(df.src, df.dst, {"weight": wgt}) + plc_dgl_graph = graph._graph(direction=direction) + + assert graph.num_nodes() == num_nodes + assert graph.num_edges() == len(df) + assert graph.is_homogeneous + assert not graph.is_multi_gpu + + assert ( + graph.nodes() == torch.arange(num_nodes, dtype=torch.int64, device="cuda") + ).all() + + assert graph.nodes[None]["x"] is not None + assert (graph.nodes[None]["x"] == torch.as_tensor(node_x, device="cuda")).all() + assert ( + graph.nodes[None]["num"] + == torch.arange(num_nodes, dtype=torch.int64, device="cuda") + ).all() + + assert ( + graph.edges("eid", device="cuda") + == torch.arange(len(df), dtype=torch.int64, device="cuda") + ).all() + assert (graph.edges[None]["weight"] == torch.as_tensor(wgt, device="cuda")).all() + + plc_expected_graph = pylibcugraph.SGGraph( + pylibcugraph.ResourceHandle(), + pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), + df.src if direction == "out" else df.dst, + df.dst if direction == "out" else df.src, + vertices_array=cupy.arange(num_nodes, dtype="int64"), + ) + + # Do the expensive check to make sure this test fails if an invalid + # graph is constructed. + v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( + pylibcugraph.ResourceHandle(), + plc_dgl_graph, + source_vertices=cupy.arange(num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( + pylibcugraph.ResourceHandle(), + plc_expected_graph, + source_vertices=cupy.arange(num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + assert (v_actual == v_exp).all() + assert (d_in_actual == d_in_exp).all() + assert (d_out_actual == d_out_exp).all() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_heterogeneous_graph(direction): + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + + graph = cugraph_dgl.Graph() + total_num_nodes = max(df.src.max(), df.dst.max()) + 1 + + num_nodes_group_1 = total_num_nodes // 2 + num_nodes_group_2 = total_num_nodes - num_nodes_group_1 + + node_x_1 = np.random.random((num_nodes_group_1,)) + node_x_2 = np.random.random((num_nodes_group_2,)) + + graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") + graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") + + edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + + edges_12.dst -= num_nodes_group_1 + edges_21.src -= num_nodes_group_1 + edges_22.dst -= num_nodes_group_1 + edges_22.src -= num_nodes_group_1 + + graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) + graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) + graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) + graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) + + assert not graph.is_homogeneous + assert not graph.is_multi_gpu + + # Verify graph.nodes() + assert ( + graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type1") + == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type2") + == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") + ).all() + + # Verify graph.edges() + assert ( + graph.edges("eid", etype=("type1", "e1", "type1")) + == torch.arange(len(edges_11), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type1", "e2", "type2")) + == torch.arange(len(edges_12), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e3", "type1")) + == torch.arange(len(edges_21), dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e4", "type2")) + == torch.arange(len(edges_22), dtype=torch.int64, device="cuda") + ).all() + + # Use sampling call to check graph creation + # This isn't a test of cuGraph sampling with DGL; the options are + # set to verify the graph only. + plc_graph = graph._graph(direction) + sampling_output = pylibcugraph.uniform_neighbor_sample( + pylibcugraph.ResourceHandle(), + plc_graph, + start_list=cupy.arange(total_num_nodes, dtype="int64"), + h_fan_out=np.array([1, 1], dtype="int32"), + with_replacement=False, + do_expensive_check=True, + with_edge_properties=True, + prior_sources_behavior="exclude", + return_dict=True, + ) + + expected_etypes = { + 0: "e1", + 1: "e2", + 2: "e3", + 3: "e4", + } + expected_offsets = { + 0: (0, 0), + 1: (0, num_nodes_group_1), + 2: (num_nodes_group_1, 0), + 3: (num_nodes_group_1, num_nodes_group_1), + } + if direction == "in": + src_col = "minors" + dst_col = "majors" + else: + src_col = "majors" + dst_col = "minors" + + # Looping over the output verifies that all edges are valid + # (and therefore, the graph is valid) + for i, etype in enumerate(sampling_output["edge_type"].tolist()): + eid = int(sampling_output["edge_id"][i]) + + srcs, dsts, eids = graph.edges( + "all", etype=expected_etypes[etype], device="cpu" + ) + + assert eids[eid] == eid + assert ( + srcs[eid] == int(sampling_output[src_col][i]) - expected_offsets[etype][0] + ) + assert ( + dsts[eid] == int(sampling_output[dst_col][i]) - expected_offsets[etype][1] + ) diff --git a/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py new file mode 100644 index 00000000000..eedda664c52 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/test_graph_mg.py @@ -0,0 +1,310 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +import cugraph_dgl +import pylibcugraph +import cupy +import numpy as np + +import cudf + +from cugraph.datasets import karate +from cugraph.utilities.utils import import_optional, MissingModule + +from cugraph.gnn import ( + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, + cugraph_comms_get_raft_handle, +) + +from .utils import init_pytorch_worker + +pylibwholegraph = import_optional("pylibwholegraph") +torch = import_optional("torch") +dgl = import_optional("dgl") + + +def run_test_graph_make_homogeneous_graph_mg(rank, uid, world_size, direction): + init_pytorch_worker(rank, world_size, uid, init_wholegraph=True) + + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + wgt = np.random.random((len(df),)) + + graph = cugraph_dgl.Graph( + is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" + ) + + # The number of nodes is set globally but features can have + # any distribution across workers as long as they are in order. + global_num_nodes = max(df.src.max(), df.dst.max()) + 1 + node_x = np.array_split(np.arange(global_num_nodes, dtype="int64"), world_size)[ + rank + ] + + # Each worker gets a shuffled, permuted version of the edgelist + df = df.sample(frac=1.0) + df.src = (df.src + rank) % global_num_nodes + df.dst = (df.dst + rank + 1) % global_num_nodes + + graph.add_nodes(global_num_nodes, data={"x": node_x}) + graph.add_edges(df.src, df.dst, {"weight": wgt}) + plc_dgl_graph = graph._graph(direction=direction) + + assert graph.num_nodes() == global_num_nodes + assert graph.num_edges() == len(df) * world_size + assert graph.is_homogeneous + assert graph.is_multi_gpu + + assert ( + graph.nodes() + == torch.arange(global_num_nodes, dtype=torch.int64, device="cuda") + ).all() + ix = torch.arange(len(node_x) * rank, len(node_x) * (rank + 1), dtype=torch.int64) + assert graph.nodes[ix]["x"] is not None + assert (graph.nodes[ix]["x"] == torch.as_tensor(node_x, device="cuda")).all() + + assert ( + graph.edges("eid", device="cuda") + == torch.arange(world_size * len(df), dtype=torch.int64, device="cuda") + ).all() + ix = torch.arange(len(df) * rank, len(df) * (rank + 1), dtype=torch.int64) + assert (graph.edges[ix]["weight"] == torch.as_tensor(wgt, device="cuda")).all() + + plc_handle = pylibcugraph.ResourceHandle( + cugraph_comms_get_raft_handle().getHandle() + ) + + plc_expected_graph = pylibcugraph.MGGraph( + plc_handle, + pylibcugraph.GraphProperties(is_multigraph=True, is_symmetric=False), + [df.src] if direction == "out" else [df.dst], + [df.dst] if direction == "out" else [df.src], + vertices_array=[ + cupy.array_split(cupy.arange(global_num_nodes, dtype="int64"), world_size)[ + rank + ] + ], + ) + + # Do the expensive check to make sure this test fails if an invalid + # graph is constructed. + v_actual, d_in_actual, d_out_actual = pylibcugraph.degrees( + plc_handle, + plc_dgl_graph, + source_vertices=cupy.arange(global_num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + v_exp, d_in_exp, d_out_exp = pylibcugraph.degrees( + plc_handle, + plc_expected_graph, + source_vertices=cupy.arange(global_num_nodes, dtype="int64"), + do_expensive_check=True, + ) + + assert (v_actual == v_exp).all() + assert (d_in_actual == d_in_exp).all() + assert (d_out_actual == d_out_exp).all() + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" +) +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_homogeneous_graph_mg(direction): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_graph_make_homogeneous_graph_mg, + args=( + uid, + world_size, + direction, + ), + nprocs=world_size, + ) + + +def run_test_graph_make_heterogeneous_graph_mg(rank, uid, world_size, direction): + init_pytorch_worker(rank, world_size, uid) + + df = karate.get_edgelist() + df.src = df.src.astype("int64") + df.dst = df.dst.astype("int64") + + graph = cugraph_dgl.Graph(is_multi_gpu=True) + total_num_nodes = max(df.src.max(), df.dst.max()) + 1 + + # Each worker gets a shuffled, permuted version of the edgelist + df = df.sample(frac=1.0) + df.src = (df.src + rank) % total_num_nodes + df.dst = (df.dst + rank + 1) % total_num_nodes + + num_nodes_group_1 = total_num_nodes // 2 + num_nodes_group_2 = total_num_nodes - num_nodes_group_1 + + node_x_1 = np.array_split(np.random.random((num_nodes_group_1,)), world_size)[rank] + node_x_2 = np.array_split(np.random.random((num_nodes_group_2,)), world_size)[rank] + + graph.add_nodes(num_nodes_group_1, {"x": node_x_1}, "type1") + graph.add_nodes(num_nodes_group_2, {"x": node_x_2}, "type2") + + edges_11 = df[(df.src < num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_12 = df[(df.src < num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + edges_21 = df[(df.src >= num_nodes_group_1) & (df.dst < num_nodes_group_1)] + edges_22 = df[(df.src >= num_nodes_group_1) & (df.dst >= num_nodes_group_1)] + + edges_12.dst -= num_nodes_group_1 + edges_21.src -= num_nodes_group_1 + edges_22.dst -= num_nodes_group_1 + edges_22.src -= num_nodes_group_1 + + total_edges_11 = torch.tensor(len(edges_11), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_11, torch.distributed.ReduceOp.SUM) + total_edges_12 = torch.tensor(len(edges_12), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_12, torch.distributed.ReduceOp.SUM) + total_edges_21 = torch.tensor(len(edges_21), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_21, torch.distributed.ReduceOp.SUM) + total_edges_22 = torch.tensor(len(edges_22), device="cuda", dtype=torch.int64) + torch.distributed.all_reduce(total_edges_22, torch.distributed.ReduceOp.SUM) + + graph.add_edges(edges_11.src, edges_11.dst, etype=("type1", "e1", "type1")) + graph.add_edges(edges_12.src, edges_12.dst, etype=("type1", "e2", "type2")) + graph.add_edges(edges_21.src, edges_21.dst, etype=("type2", "e3", "type1")) + graph.add_edges(edges_22.src, edges_22.dst, etype=("type2", "e4", "type2")) + + assert not graph.is_homogeneous + assert graph.is_multi_gpu + + # Verify graph.nodes() + assert ( + graph.nodes() == torch.arange(total_num_nodes, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type1") + == torch.arange(num_nodes_group_1, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.nodes("type2") + == torch.arange(num_nodes_group_2, dtype=torch.int64, device="cuda") + ).all() + + # Verify graph.edges() + assert ( + graph.edges("eid", etype=("type1", "e1", "type1")) + == torch.arange(total_edges_11, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type1", "e2", "type2")) + == torch.arange(total_edges_12, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e3", "type1")) + == torch.arange(total_edges_21, dtype=torch.int64, device="cuda") + ).all() + assert ( + graph.edges("eid", etype=("type2", "e4", "type2")) + == torch.arange(total_edges_22, dtype=torch.int64, device="cuda") + ).all() + + # Use sampling call to check graph creation + # This isn't a test of cuGraph sampling with DGL; the options are + # set to verify the graph only. + plc_graph = graph._graph(direction) + assert isinstance(plc_graph, pylibcugraph.MGGraph) + sampling_output = pylibcugraph.uniform_neighbor_sample( + graph._resource_handle, + plc_graph, + start_list=cupy.arange(total_num_nodes, dtype="int64"), + batch_id_list=cupy.full(total_num_nodes, rank, dtype="int32"), + label_list=cupy.arange(world_size, dtype="int32"), + label_to_output_comm_rank=cupy.arange(world_size, dtype="int32"), + h_fan_out=np.array([-1], dtype="int32"), + with_replacement=False, + do_expensive_check=True, + with_edge_properties=True, + prior_sources_behavior="exclude", + return_dict=True, + ) + + sdf = cudf.DataFrame( + { + "majors": sampling_output["majors"], + "minors": sampling_output["minors"], + "edge_id": sampling_output["edge_id"], + "edge_type": sampling_output["edge_type"], + } + ) + + expected_offsets = { + 0: (0, 0), + 1: (0, num_nodes_group_1), + 2: (num_nodes_group_1, 0), + 3: (num_nodes_group_1, num_nodes_group_1), + } + if direction == "in": + src_col = "minors" + dst_col = "majors" + else: + src_col = "majors" + dst_col = "minors" + + edges_11["etype"] = 0 + edges_12["etype"] = 1 + edges_21["etype"] = 2 + edges_22["etype"] = 3 + + cdf = cudf.concat([edges_11, edges_12, edges_21, edges_22]) + for i in range(len(cdf)): + row = cdf.iloc[i] + etype = row["etype"] + src = row["src"] + expected_offsets[etype][0] + dst = row["dst"] + expected_offsets[etype][1] + + f = sdf[ + (sdf[src_col] == src) & (sdf[dst_col] == dst) & (sdf["edge_type"] == etype) + ] + assert len(f) > 0 # may be multiple, some could be on other GPU + + cugraph_comms_shutdown() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" +) +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +@pytest.mark.parametrize("direction", ["out", "in"]) +def test_graph_make_heterogeneous_graph_mg(direction): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_graph_make_heterogeneous_graph_mg, + args=( + uid, + world_size, + direction, + ), + nprocs=world_size, + ) diff --git a/python/cugraph-dgl/tests/test_utils.py b/python/cugraph-dgl/cugraph_dgl/tests/test_utils.py similarity index 100% rename from python/cugraph-dgl/tests/test_utils.py rename to python/cugraph-dgl/cugraph_dgl/tests/test_utils.py diff --git a/python/cugraph-dgl/cugraph_dgl/tests/utils.py b/python/cugraph-dgl/cugraph_dgl/tests/utils.py new file mode 100644 index 00000000000..fa4eb05f297 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/tests/utils.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from cugraph.utilities.utils import import_optional +from cugraph.gnn import cugraph_comms_init + +th = import_optional("torch") + + +def assert_same_node_feats_daskapi(gs, g): + assert set(gs.ndata.keys()) == set(g.ndata.keys()) + + for key in g.ndata.keys(): + for ntype in g.ntypes: + indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype).cuda() + if len(g.ntypes) <= 1 or ntype in g.ndata[key]: + g_output = g.get_node_storage(key=key, ntype=ntype).fetch( + indices, device="cuda" + ) + gs_output = gs.get_node_storage(key=key, ntype=ntype).fetch(indices) + equal_t = (gs_output != g_output).sum().cpu() + assert equal_t == 0 + + +def assert_same_node_feats(gs, g): + assert set(gs.ndata.keys()) == set(g.ndata.keys()) + assert set(gs.ntypes) == set(g.ntypes) + + for key in g.ndata.keys(): + for ntype in g.ntypes: + if len(g.ntypes) <= 1 or ntype in g.ndata[key]: + indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype) + + g_output = g.ndata[key] + gs_output = gs.ndata[key] + + if len(g.ntypes) > 1: + g_output = g_output[ntype] + gs_output = gs_output[ntype] + + g_output = g_output[indices] + gs_output = gs_output[indices] + + equal_t = (gs_output != g_output).sum() + assert equal_t == 0 + + +def assert_same_num_nodes(gs, g): + for ntype in g.ntypes: + assert g.num_nodes(ntype) == gs.num_nodes(ntype) + + +def assert_same_num_edges_can_etypes(gs, g): + for can_etype in g.canonical_etypes: + assert g.num_edges(can_etype) == gs.num_edges(can_etype) + + +def assert_same_num_edges_etypes(gs, g): + for etype in g.etypes: + assert g.num_edges(etype) == gs.num_edges(etype) + + +def assert_same_edge_feats_daskapi(gs, g): + assert set(gs.edata.keys()) == set(g.edata.keys()) + for key in g.edata.keys(): + for etype in g.canonical_etypes: + indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() + if len(g.etypes) <= 1 or etype in g.edata[key]: + g_output = g.get_edge_storage(key=key, etype=etype).fetch( + indices, device="cuda" + ) + gs_output = gs.get_edge_storage(key=key, etype=etype).fetch(indices) + equal_t = (gs_output != g_output).sum().cpu() + assert equal_t == 0 + + +def assert_same_edge_feats(gs, g): + assert set(gs.edata.keys()) == set(g.edata.keys()) + assert set(gs.canonical_etypes) == set(g.canonical_etypes) + assert set(gs.etypes) == set(g.etypes) + + for key in g.edata.keys(): + for etype in g.canonical_etypes: + if len(g.etypes) <= 1 or etype in g.edata[key]: + indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() + g_output = g.edata[key] + gs_output = gs.edata[key] + + if len(g.etypes) > 1: + g_output = g_output[etype] + gs_output = gs_output[etype] + + g_output = g_output[indices] + gs_output = gs_output[indices] + + equal_t = (gs_output != g_output).sum().cpu() + assert equal_t == 0 + + +def assert_same_sampling_len(dgl_g, cugraph_gs, nodes, fanout, edge_dir): + dgl_o = dgl_g.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) + cugraph_o = cugraph_gs.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) + assert cugraph_o.num_edges() == dgl_o.num_edges() + for etype in dgl_o.canonical_etypes: + assert dgl_o.num_edges(etype) == cugraph_o.num_edges(etype) + + +def init_pytorch_worker(rank, world_size, cugraph_id, init_wholegraph=False): + import rmm + + rmm.reinitialize( + devices=rank, + ) + + import cupy + + cupy.cuda.Device(rank).use() + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + th.cuda.set_device(rank) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + th.distributed.init_process_group("nccl", rank=rank, world_size=world_size) + + if init_wholegraph: + import pylibwholegraph + + pylibwholegraph.torch.initialize.init( + rank, + world_size, + rank, + world_size, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) diff --git a/python/cugraph-dgl/cugraph_dgl/typing.py b/python/cugraph-dgl/cugraph_dgl/typing.py new file mode 100644 index 00000000000..a68463c3fd9 --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/typing.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Union, Tuple +from cugraph.utilities.utils import import_optional + +from cugraph_dgl.nn import SparseGraph + +import pandas +import numpy +import cupy +import cudf + +torch = import_optional("torch") +dgl = import_optional("dgl") + +TensorType = Union[ + "torch.Tensor", + "cupy.ndarray", + "numpy.ndarray", + "cudf.Series", + "pandas.Series", + List[int], +] + +DGLSamplerOutput = Tuple[ + "torch.Tensor", + "torch.Tensor", + List[Union["dgl.Block", SparseGraph]], +] diff --git a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py index 647dbd38a64..2ba04bd916f 100644 --- a/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py +++ b/python/cugraph-dgl/cugraph_dgl/utils/cugraph_conversion_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,12 +15,15 @@ from __future__ import annotations from typing import Dict, Tuple, Union +from cugraph_dgl.typing import TensorType + import cudf import pandas as pd import dask.dataframe as dd import dask_cudf from dask.distributed import get_client import cupy as cp +import numpy as np from cugraph.utilities.utils import import_optional from cugraph.gnn.dgl_extensions.dgl_uniform_sampler import src_n, dst_n @@ -115,3 +118,13 @@ def add_edata_from_dgl_HeteroGraph(gs, g): gs.edata_storage.add_data( feat_name=feat_name, type_name=etype, feat_obj=feat_t ) + + +def _cast_to_torch_tensor(t: TensorType) -> "torch.Tensor": + if isinstance(t, torch.Tensor): + return t + elif isinstance(t, (cp.ndarray, cudf.Series)): + return torch.as_tensor(t, device="cuda") + elif isinstance(t, (pd.Series, np.ndarray)): + return torch.as_tensor(t, device="cpu") + return torch.as_tensor(t) diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py new file mode 100644 index 00000000000..dbc53e73b6a --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -0,0 +1,310 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from collections import defaultdict +from collections.abc import MutableMapping +from typing import Union, Dict, List, Tuple + +from cugraph.utilities.utils import import_optional + +import cugraph_dgl +from cugraph_dgl.typing import TensorType + +torch = import_optional("torch") +dgl = import_optional("dgl") + + +class HeteroEdgeDataView(MutableMapping): + """ + Duck-typed version of DGL's HeteroEdgeDataView. + Used for accessing and modifying edge features. + """ + + def __init__( + self, + graph: "cugraph_dgl.Graph", + etype: Union[Tuple[str, str, str], List[Tuple[str, str, str]]], + edges: TensorType, + ): + self.__graph = graph + self.__etype = etype + self.__edges = edges + + @property + def _etype(self) -> Tuple[str, str, str]: + return self.__etype + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + @property + def _edges(self) -> TensorType: + return self.__edges + + def __getitem__(self, key: str): + if isinstance(self._etype, list): + return { + t: self._graph._get_e_emb(t, key, self._edges) + for t in self._etype + if self._graph._has_e_emb(t, key) + } + + return self._graph._get_e_emb(self._etype, key, self._edges) + + def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): + if isinstance(self._etype, list): + if not isinstance(val, dict): + raise ValueError( + "There are multiple edge types in this view. " + "Expected a dictionary of values." + ) + for t, v in val.items(): + if t not in self._etype: + raise ValueError("Attempted to modify a type out of view.") + self._graph.set_e_emb(t, self._edges, {key: v}) + else: + if isinstance(val, dict): + raise ValueError( + "There is only one edge type in this view. " + "Expected a single tensor." + ) + self._graph.set_e_emb(self._etype, self._edges, {key: v}) + + def __delitem__(self, key: str): + if isinstance(self._etype, list): + for t in self._etype: + self._graph.pop_e_emb(t, key) + else: + self._graph.pop_e_emb(self._etype, key) + + def _transpose(self, fetch_vals=True): + if isinstance(self._etype, list): + tr = defaultdict(dict) + for etype in self._etype: + for key in self._graph._get_e_emb_keys(etype): + tr[key][etype] = ( + self._graph._get_e_emb(etype, key, self._edges) + if fetch_vals + else [] + ) + else: + tr = {} + for key in self._graph._get_e_emb_keys(self._etype): + tr[key] = ( + self._graph._get_e_emb(self._etype, key, self._edges) + if fetch_vals + else [] + ) + + return tr + + def __len__(self): + return len(self._transpose(fetch_vals=False)) + + def __iter__(self): + return iter(self._transpose()) + + def keys(self): + return self._transpose(fetch_vals=False).keys() + + def values(self): + return self._transpose().values() + + def __repr__(self): + return repr(self._transpose(fetch_vals=False)) + + +class HeteroNodeDataView(MutableMapping): + """ + Duck-typed version of DGL's HeteroNodeDataView. + Used for accessing and modifying node features. + """ + + def __init__( + self, + graph: "cugraph_dgl.Graph", + ntype: Union[str, List[str]], + nodes: TensorType, + ): + self.__graph = graph + self.__ntype = ntype + self.__nodes = nodes + + @property + def _ntype(self) -> str: + return self.__ntype + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + @property + def _nodes(self) -> TensorType: + return self.__nodes + + def __getitem__(self, key: str): + if isinstance(self._ntype, list): + return { + t: self._graph._get_n_emb(t, key, self._nodes) + for t in self._ntype + if self._graph._has_n_emb(t, key) + } + else: + return self._graph._get_n_emb(self._ntype, key, self._nodes) + + def __setitem__(self, key: str, val: Union[TensorType, Dict[str, TensorType]]): + if isinstance(self._ntype, list): + if not isinstance(val, dict): + raise ValueError( + "There are multiple node types in this view. " + "Expected a dictionary of values." + ) + for t, v in val.items(): + if t not in self._ntype: + raise ValueError("Attempted to modify a type out of view.") + self._graph._set_n_emb(t, self._nodes, {key: v}) + else: + if isinstance(val, dict): + raise ValueError( + "There is only one node type in this view. " + "Expected a single value tensor." + ) + self._graph._set_n_emb(self._ntype, self._nodes, {key: val}) + + def __delitem__(self, key: str): + if isinstance(self._ntype, list): + for t in self._ntype: + self._graph._pop_n_emb(t, key) + else: + self._graph.pop_n_emb(self._ntype, key) + + def _transpose(self, fetch_vals=True): + if isinstance(self._ntype, list): + tr = defaultdict(dict) + for ntype in self._ntype: + for key in self._graph._get_n_emb_keys(ntype): + tr[key][ntype] = ( + self._graph._get_n_emb(ntype, key, self._nodes) + if fetch_vals + else [] + ) + else: + tr = {} + for key in self._graph._get_n_emb_keys(self._ntype): + tr[key] = ( + self._graph._get_n_emb(self._ntype, key, self._nodes) + if fetch_vals + else [] + ) + + return tr + + def __len__(self): + return len(self._transpose(fetch_vals=False)) + + def __iter__(self): + return iter(self._transpose()) + + def keys(self): + return self._transpose(fetch_vals=False).keys() + + def values(self): + return self._transpose().values() + + def __repr__(self): + return repr(self._transpose(fetch_vals=False)) + + +class HeteroEdgeView: + """ + Duck-typed version of DGL's HeteroEdgeView. + """ + + def __init__(self, graph): + self.__graph = graph + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + def __getitem__(self, key): + if isinstance(key, slice): + if not (key.start is None and key.stop is None and key.stop is None): + raise ValueError("Only full slices are supported in DGL.") + edges = dgl.base.ALL + etype = None + elif key is None: + edges = dgl.base.ALL + etype = None + elif isinstance(key, tuple): + if len(key) == 3: + edges = dgl.base.ALL + etype = key + else: + edges = key + etype = None + elif isinstance(key, str): + edges = dgl.base.ALL + etype = key + else: + edges = key + etype = None + + return HeteroEdgeDataView( + graph=self.__graph, + etype=etype, + edges=edges, + ) + + def __call__(self, *args, **kwargs): + if "device" in kwargs: + return self.__graph.all_edges(*args, **kwargs) + + return self.__graph.all_edges(*args, **kwargs, device="cuda") + + +class HeteroNodeView: + """ + Duck-typed version of DGL's HeteroNodeView. + """ + + def __init__(self, graph: "cugraph_dgl.Graph"): + self.__graph = graph + + @property + def _graph(self) -> "cugraph_dgl.Graph": + return self.__graph + + def __getitem__(self, key): + if isinstance(key, slice): + if not (key.start is None and key.stop is None and key.stop is None): + raise ValueError("Only full slices are supported in DGL.") + nodes = dgl.base.ALL + ntype = None + elif isinstance(key, tuple): + nodes, ntype = key + elif key is None or isinstance(key, str): + nodes = dgl.base.ALL + ntype = key + else: + nodes = key + ntype = None + + return HeteroNodeDataView(graph=self.__graph, ntype=ntype, nodes=nodes) + + def __call__(self, ntype=None): + return torch.arange( + 0, self.__graph.num_nodes(ntype), dtype=self.__graph.idtype, device="cuda" + ) diff --git a/python/cugraph-dgl/tests/utils.py b/python/cugraph-dgl/tests/utils.py deleted file mode 100644 index d6a90840b72..00000000000 --- a/python/cugraph-dgl/tests/utils.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from cugraph.utilities.utils import import_optional - -th = import_optional("torch") - - -def assert_same_node_feats(gs, g): - set(gs.ndata.keys()) == set(g.ndata.keys()) - - for key in g.ndata.keys(): - for ntype in g.ntypes: - indices = th.arange(0, g.num_nodes(ntype), dtype=g.idtype).cuda() - if len(g.ntypes) <= 1 or ntype in g.ndata[key]: - g_output = g.get_node_storage(key=key, ntype=ntype).fetch( - indices, device="cuda" - ) - gs_output = gs.get_node_storage(key=key, ntype=ntype).fetch(indices) - equal_t = (gs_output != g_output).sum().cpu() - assert equal_t == 0 - - -def assert_same_num_nodes(gs, g): - for ntype in g.ntypes: - assert g.num_nodes(ntype) == gs.num_nodes(ntype) - - -def assert_same_num_edges_can_etypes(gs, g): - for can_etype in g.canonical_etypes: - assert g.num_edges(can_etype) == gs.num_edges(can_etype) - - -def assert_same_num_edges_etypes(gs, g): - for etype in g.etypes: - assert g.num_edges(etype) == gs.num_edges(etype) - - -def assert_same_edge_feats(gs, g): - set(gs.edata.keys()) == set(g.edata.keys()) - for key in g.edata.keys(): - for etype in g.canonical_etypes: - indices = th.arange(0, g.num_edges(etype), dtype=g.idtype).cuda() - if len(g.etypes) <= 1 or etype in g.edata[key]: - g_output = g.get_edge_storage(key=key, etype=etype).fetch( - indices, device="cuda" - ) - gs_output = gs.get_edge_storage(key=key, etype=etype).fetch(indices) - equal_t = (gs_output != g_output).sum().cpu() - assert equal_t == 0 - - -def assert_same_sampling_len(dgl_g, cugraph_gs, nodes, fanout, edge_dir): - dgl_o = dgl_g.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) - cugraph_o = cugraph_gs.sample_neighbors(nodes, fanout=fanout, edge_dir=edge_dir) - assert cugraph_o.num_edges() == dgl_o.num_edges() - for etype in dgl_o.canonical_etypes: - assert dgl_o.num_edges(etype) == cugraph_o.num_edges(etype)