Re-Added cluster_method arg in scc

Starlitnightly · Starlitnightly · commit b7d315cc0217 · 2024-10-10T01:57:36.000+08:00
diff --git a/requirements.txt b/requirements.txt
@@ -19,10 +19,10 @@ nbconvert
 networkx>=2.6.3
 # ngs_tools>=1.6.0
 numba>=0.46.0
-numpy>=1.18.1,<=1.23.5
+numpy>=1.18.1
 opencv-python>=4.5.4.60
 # pandana
-pandas>=0.25.1,<=1.5.3
+pandas>=0.25.1
 # paste-bio>=1.4.0
 plotly>=5.1.0
 POT>=0.8.1
diff --git a/spateo/tools/cluster/__init__.py b/spateo/tools/cluster/__init__.py
@@ -1,5 +1,5 @@
 from .cluster_spagcn import spagcn_vanilla
-from .find_clusters import optimize_cluster, scc, spagcn_pyg
+from .find_clusters import scc, smooth, spagcn_pyg
 from .utils import (
     compute_pca_components,
     ecp_silhouette,
diff --git a/spateo/tools/cluster/find_clusters.py b/spateo/tools/cluster/find_clusters.py
@@ -12,7 +12,7 @@
 from scipy.spatial import distance
 
 from ...configuration import SKM
-from .leiden import calculate_leiden_partition
+from .leiden import calculate_leiden_partition, calculate_louvain_partition
 from .spagcn_utils import *
 from .utils import spatial_adj
 
@@ -195,6 +195,7 @@ def scc(
     e_neigh: int = 30,
     s_neigh: int = 6,
     resolution: Optional[float] = None,
+    cluster_method="louvain",
 ) -> Optional[anndata.AnnData]:
     """Spatially constrained clustering (scc) to identify continuous tissue domains.
 
@@ -213,7 +214,7 @@ def scc(
         pca_key: label for the .obsm key containing PCA information (without the potential prefix "X_")
         e_neigh: the number of nearest neighbor in gene expression space.
         s_neigh: the number of nearest neighbor in physical space.
-        resolution: the resolution parameter of the louvain clustering algorithm.
+        resolution: the resolution parameter of the leiden clustering algorithm.
 
     Returns:
         adata: An `~anndata.AnnData` object with cluster info in .obs.
@@ -229,10 +230,16 @@ def scc(
     )
 
     # Perform Leiden clustering:
-    clusters = calculate_leiden_partition(
-        adj=adj,
-        resolution=resolution,
-    )
+    if cluster_method == "louvain":
+        clusters = calculate_louvain_partition(
+            adj=adj,
+            resolution=resolution,
+        )
+    else:
+        clusters = calculate_leiden_partition(
+            adj=adj,
+            resolution=resolution,
+        )
 
     adata.obs[key_added] = clusters
     adata.obs[key_added] = adata.obs[key_added].astype(str)
@@ -241,7 +248,7 @@ def scc(
 
 
 @SKM.check_adata_is_type(SKM.ADATA_UMI_TYPE)
-def optimize_cluster(adata: anndata.AnnData, radius: int = 50, key: str = "label") -> list:
+def smooth(adata: anndata.AnnData, radius: int = 50, key: str = "label") -> list:
     """
     Optimize the label by majority voting in the neighborhood.
 
diff --git a/spateo/tools/cluster/leiden.py b/spateo/tools/cluster/leiden.py
@@ -121,3 +121,70 @@ def calculate_leiden_partition(
     clusters = np.array(partition.membership, dtype=int)
     logger.finish_progress(progress_name="Community clustering with %s" % ("leiden"))
     return clusters
+
+
+def calculate_louvain_partition(
+    adj: Optional[Union[scipy.sparse.spmatrix, np.ndarray]] = None,
+    input_mat: Optional[np.ndarray] = None,
+    num_neighbors: int = 10,
+    graph_type: Literal["distance", "embedding"] = "distance",
+    resolution: float = 1.0,
+    n_iterations: int = -1,
+) -> np.ndarray:
+    """Performs Louvain clustering on a given dataset.
+
+    Args:
+        adj: Optional precomputed adjacency matrix
+        input_mat: Optional, will be used only if 'adj' is not given. The input data, will be interepreted as either a
+            distance matrix (if :param `graph_type` is "distance" or an embedding matrix (if :param `graph_type` is
+            "embedding")
+        num_neighbors: Only used if 'adj' is not given- the number of nearest neighbors for constructing the graph
+        graph_type: Only used if 'adj' is not given- specifies the input type, either 'distance' or 'embedding'
+        resolution: The resolution parameter for the Louvain algorithm
+        n_iterations: The number of iterations for the Louvain algorithm (-1 for unlimited iterations)
+
+    Returns:
+        clusters: Array containing cluster assignments
+    """
+    import louvain
+
+    from ...logging import logger_manager as lm
+
+    logger = lm.get_main_logger()
+    if adj is None and input_mat is None:
+        raise ValueError("Either `adj` or `input_mat` must be specified")
+
+    logger.info("using adj_matrix from arg for clustering...")
+
+    if adj is not None:
+        if scipy.sparse.issparse(adj):
+            pass
+        else:
+            adj = scipy.sparse.csr_matrix(adj)
+        sources, targets = adj.nonzero()
+        weights = adj[sources, targets]
+        if isinstance(weights, np.matrix):
+            weights = weights.A1
+        G = igraph.Graph(directed=None)
+        G.add_vertices(adj.shape[0])  # this adds adjacency.shape[0] vertices
+        G.add_edges(list(zip(sources, targets)))
+        try:
+            G.es["weight"] = weights
+        except KeyError:
+            pass
+        if G.vcount() != adj.shape[0]:
+            print(
+                f"The constructed graph has only {G.vcount()} nodes. "
+                "Your adjacency matrix contained redundant nodes."
+            )
+    else:
+        if graph_type == "distance":
+            G = distance_knn_graph(input_mat, num_neighbors)
+        elif graph_type == "embedding":
+            G = embedding_knn_graph(input_mat, num_neighbors)
+    logger.info("Converting graph_sparse_matrix to igraph object", indent_level=2)
+    partition_kwargs = {"resolution_parameter": resolution, "seed": 42, "weights": G.es["weight"]}
+    partition = louvain.find_partition(G, louvain.RBConfigurationVertexPartition, **partition_kwargs)
+    clusters = np.array(partition.membership, dtype=int)
+    logger.finish_progress(progress_name="Community clustering with %s" % ("louvain"))
+    return clusters