|
| 1 | +from typing import Optional, Sequence, Union |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +from anndata import AnnData |
| 6 | +from esda.moran import Moran_BV, Moran_Local_BV |
| 7 | +from libpysal.weights import WSP |
| 8 | + |
| 9 | +from .find_neighbors import neighbors |
| 10 | + |
| 11 | + |
| 12 | +def spatial_bv_moran_obs_genes( |
| 13 | + adata: AnnData, |
| 14 | + obs_key: str, |
| 15 | + connectivity_key: str = "spatial_connectivities", |
| 16 | + genes: Union[str, int, Sequence[str], Sequence[int], None] = None, |
| 17 | + n_neighbors: int = 10, |
| 18 | + mode: str = "moran", |
| 19 | + transformation: str = "r", |
| 20 | + permutations: Optional[int] = 999, |
| 21 | + copy: bool = False, |
| 22 | +) -> Optional[pd.DataFrame]: |
| 23 | + """ |
| 24 | + Calculate global bivariate Moran's I between a spatial variable and gene expression |
| 25 | +
|
| 26 | + Parameters |
| 27 | + ---------- |
| 28 | + adata |
| 29 | + AnnData object containing spatial data |
| 30 | + obs_key |
| 31 | + Key in `adata.obs` for the variable |
| 32 | + connectivity_key |
| 33 | + Key in `adata.obsp` for spatial connectivity matrix (default: 'spatial_connectivities') |
| 34 | + genes |
| 35 | + Genes to calculate (names or indices). If None, use all genes. |
| 36 | + mode |
| 37 | + Spatial correlation mode (only 'moran' supported) |
| 38 | + transformation |
| 39 | + Weight transformation method ('r' for row-standardization) |
| 40 | + permutations |
| 41 | + Number of permutations for significance testing |
| 42 | + copy |
| 43 | + Return a DataFrame instead of storing in AnnData |
| 44 | +
|
| 45 | + Returns |
| 46 | + ------- |
| 47 | + If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: |
| 48 | + I : float |
| 49 | + value of bivariate Moran's I |
| 50 | + sim : array |
| 51 | + (if permutations>0) |
| 52 | + vector of I values for permuted samples |
| 53 | + p_sim : float |
| 54 | + (if permutations>0) |
| 55 | + p-value based on permutations (one-sided) |
| 56 | + null: spatial randomness |
| 57 | + alternative: the observed I is extreme |
| 58 | + it is either extremely high or extremely low |
| 59 | + z_sim : array |
| 60 | + (if permutations>0) |
| 61 | + standardized I based on permutations |
| 62 | + p_z_sim : float |
| 63 | + (if permutations>0) |
| 64 | + p-value based on standard normal approximation from |
| 65 | + permutations |
| 66 | +
|
| 67 | + Otherwise, modifies the ``adata`` with the following key: |
| 68 | + - :attr:`anndata.AnnData.uns` ``['{obs_key}_gene_bv_moranI']`` - the above mentioned dataframe``. |
| 69 | + """ |
| 70 | + # Validate inputs |
| 71 | + if mode != "moran": |
| 72 | + raise ValueError(f"Unsupported mode: {mode}. Only 'moran' is currently supported") |
| 73 | + |
| 74 | + if obs_key not in adata.obs: |
| 75 | + raise KeyError(f"'{obs_key}' not found in adata.obs") |
| 76 | + |
| 77 | + if connectivity_key not in adata.obsp: |
| 78 | + neighbors( |
| 79 | + adata, |
| 80 | + basis="spatial", |
| 81 | + spatial_key="spatial", |
| 82 | + n_neighbors_method="ball_tree", |
| 83 | + n_neighbors=n_neighbors, |
| 84 | + ) |
| 85 | + connectivity_key = "spatial_connectivities" |
| 86 | + |
| 87 | + # Extract target variable and spatial weights |
| 88 | + y = np.array(adata.obs[obs_key].values, dtype=np.float64) |
| 89 | + spatial_weights = adata.obsp[connectivity_key] |
| 90 | + wsp = WSP(spatial_weights) |
| 91 | + w = wsp.to_W() |
| 92 | + |
| 93 | + # Identify genes to process |
| 94 | + if genes is None: |
| 95 | + gene_names = adata.var_names.tolist() |
| 96 | + gene_indices = range(adata.n_vars) |
| 97 | + elif isinstance(genes, (str, int)): |
| 98 | + gene_names = [genes] if isinstance(genes, str) else [adata.var_names[genes]] |
| 99 | + gene_indices = [adata.var_names.get_loc(genes)] if isinstance(genes, str) else [genes] |
| 100 | + else: |
| 101 | + gene_names = [] |
| 102 | + gene_indices = [] |
| 103 | + for gene in genes: |
| 104 | + if isinstance(gene, str): |
| 105 | + gene_names.append(gene) |
| 106 | + gene_indices.append(adata.var_names.get_loc(gene)) |
| 107 | + else: |
| 108 | + gene_names.append(adata.var_names[gene]) |
| 109 | + gene_indices.append(gene) |
| 110 | + |
| 111 | + # Prepare results storage |
| 112 | + results = { |
| 113 | + "I": [], |
| 114 | + } |
| 115 | + if permutations is not None: |
| 116 | + results.update( |
| 117 | + { |
| 118 | + "EI_sim": [], |
| 119 | + "pval_sim": [], |
| 120 | + "pval_z_sim": [], |
| 121 | + "z_sim": [], |
| 122 | + } |
| 123 | + ) |
| 124 | + |
| 125 | + # Process each gene |
| 126 | + for idx in gene_indices: |
| 127 | + # Extract gene expression |
| 128 | + x = adata.X[:, idx] |
| 129 | + if hasattr(x, "toarray"): |
| 130 | + x = x.toarray().flatten() |
| 131 | + x = x.astype(np.float64) |
| 132 | + |
| 133 | + # Calculate bivariate Moran's I |
| 134 | + moran_bv = Moran_BV( |
| 135 | + x, |
| 136 | + y, |
| 137 | + w, |
| 138 | + transformation=transformation, |
| 139 | + permutations=permutations, |
| 140 | + ) |
| 141 | + |
| 142 | + # Store results |
| 143 | + results["I"].append(moran_bv.I) |
| 144 | + |
| 145 | + if permutations is not None: |
| 146 | + results["EI_sim"].append(moran_bv.EI_sim) |
| 147 | + results["pval_sim"].append(moran_bv.p_sim) |
| 148 | + results["pval_z_sim"].append(moran_bv.p_z_sim) |
| 149 | + results["z_sim"].append(moran_bv.z_sim) |
| 150 | + |
| 151 | + # Create results DataFrame |
| 152 | + df = pd.DataFrame(results, index=gene_names) |
| 153 | + |
| 154 | + # Store or return results |
| 155 | + if copy: |
| 156 | + return df |
| 157 | + else: |
| 158 | + adata.uns[f"{obs_key}_gene_bv_moranI"] = df |
| 159 | + return None |
| 160 | + |
| 161 | + |
| 162 | +def spatial_bv_local_moran( |
| 163 | + adata: AnnData, |
| 164 | + feature1_key: str, |
| 165 | + feature2_key: str, |
| 166 | + connectivity_key: str = "spatial_connectivities", |
| 167 | + n_neighbors: int = 10, |
| 168 | + mode: str = "moran", |
| 169 | + transformation: str = "r", |
| 170 | + permutations: Optional[int] = 999, |
| 171 | + copy: bool = False, |
| 172 | +) -> Optional[pd.DataFrame]: |
| 173 | + """ |
| 174 | + Calculate global bivariate Moran's I between a spatial variable and gene expression |
| 175 | +
|
| 176 | + Parameters |
| 177 | + ---------- |
| 178 | + adata |
| 179 | + AnnData object containing spatial data |
| 180 | + feature1_key |
| 181 | + Key in `adata.obs` for the first variable or gene_name |
| 182 | + feature2_key |
| 183 | + Key in `adata.obs` for the seconda variable or gene_name |
| 184 | + connectivity_key |
| 185 | + Key in `adata.obsp` for spatial connectivity matrix (default: 'spatial_connectivities') |
| 186 | + mode |
| 187 | + Spatial correlation mode (only 'moran' supported) |
| 188 | + transformation |
| 189 | + Weight transformation method ('r' for row-standardization) |
| 190 | + permutations |
| 191 | + Number of permutations for significance testing |
| 192 | + copy |
| 193 | + Return a DataFrame instead of storing in AnnData |
| 194 | +
|
| 195 | + Returns |
| 196 | + ------- |
| 197 | + If ``copy = True``, returns a :class:`pandas.DataFrame` with the following keys: |
| 198 | + I : float |
| 199 | + value of bivariate Moran's I |
| 200 | + q : array |
| 201 | + (if permutations>0) values indicate quandrant location 1 HH, 2 LH, 3 LL, 4 HL |
| 202 | + sim : array |
| 203 | + (if permutations>0) |
| 204 | + vector of I values for permuted samples |
| 205 | + p_sim : float |
| 206 | + (if permutations>0) |
| 207 | + p-value based on permutations (one-sided) |
| 208 | + null: spatial randomness |
| 209 | + alternative: the observed I is extreme |
| 210 | + it is either extremely high or extremely low |
| 211 | + z_sim : array |
| 212 | + (if permutations>0) |
| 213 | + standardized I based on permutations |
| 214 | + p_z_sim : float |
| 215 | + (if permutations>0) |
| 216 | + p-value based on standard normal approximation from |
| 217 | + permutations |
| 218 | +
|
| 219 | + Otherwise, modifies the ``adata`` with the following key: |
| 220 | + - :attr:`anndata.AnnData.uns` ``['{feature1_key}_{feature2_key}_bv_local_moranI']`` - the above mentioned dataframe``. |
| 221 | + """ |
| 222 | + # Validate inputs |
| 223 | + if mode != "moran": |
| 224 | + raise ValueError(f"Unsupported mode: {mode}. Only 'moran' is currently supported") |
| 225 | + |
| 226 | + if feature1_key not in adata.obs and feature1_key not in adata.var_names: |
| 227 | + raise KeyError(f"'{feature1_key}' not found in adata.obs and a gene name") |
| 228 | + |
| 229 | + if feature2_key not in adata.obs and feature2_key not in adata.var_names: |
| 230 | + raise KeyError(f"'{feature2_key}' not found in adata.obs and a gene name") |
| 231 | + |
| 232 | + if connectivity_key not in adata.obsp: |
| 233 | + neighbors( |
| 234 | + adata, |
| 235 | + basis="spatial", |
| 236 | + spatial_key="spatial", |
| 237 | + n_neighbors_method="ball_tree", |
| 238 | + n_neighbors=n_neighbors, |
| 239 | + ) |
| 240 | + connectivity_key = "spatial_connectivities" |
| 241 | + |
| 242 | + # Extract target variable and spatial weights |
| 243 | + if feature1_key in adata.obs: |
| 244 | + x = np.array(adata.obs[feature1_key].values, dtype=np.float64) |
| 245 | + else: |
| 246 | + idx = adata.var_names.get_loc(feature1_key) |
| 247 | + x = adata.X[:, idx] |
| 248 | + if hasattr(x, "toarray"): |
| 249 | + x = x.toarray() |
| 250 | + x = x.squeeze().astype(np.float64) |
| 251 | + |
| 252 | + if feature2_key in adata.obs: |
| 253 | + y = np.array(adata.obs[feature2_key].values, dtype=np.float64) |
| 254 | + else: |
| 255 | + idx = adata.var_names.get_loc(feature2_key) |
| 256 | + print(hasattr(adata.X[:, idx], "toarray")) |
| 257 | + y = adata.X[:, idx] |
| 258 | + if hasattr(y, "toarray"): |
| 259 | + y = y.toarray() |
| 260 | + y = y.squeeze().astype(np.float64) |
| 261 | + |
| 262 | + spatial_weights = adata.obsp[connectivity_key] |
| 263 | + wsp = WSP(spatial_weights) |
| 264 | + w = wsp.to_W() |
| 265 | + |
| 266 | + local_moran_bv = Moran_Local_BV(x, y, w, transformation=transformation, permutations=permutations) |
| 267 | + |
| 268 | + df = pd.DataFrame(index=adata.obs_names) |
| 269 | + df["I"] = local_moran_bv.Is |
| 270 | + if permutations is not None: |
| 271 | + df["q"] = local_moran_bv.q |
| 272 | + df["EI_sim"] = local_moran_bv.EI_sim |
| 273 | + df["pval_sim"] = local_moran_bv.p_sim |
| 274 | + df["pval_z_sim"] = local_moran_bv.p_z_sim |
| 275 | + df["z_sim"] = local_moran_bv.z_sim |
| 276 | + |
| 277 | + # Store or return results |
| 278 | + if copy: |
| 279 | + return df |
| 280 | + else: |
| 281 | + adata.uns[f"{feature1_key}_{feature2_key}_bv_local_moranI"] = df |
| 282 | + return None |
0 commit comments