ci: Ori 1.3.x resolvi fixes backport (scverse#3313)

ori-kron-wis · pre-commit-ci[bot] · ethanweinberger · web-flow · commit b4f85905a7c0 · 2025-05-13T14:40:45.000+03:00
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Ethan Weinberger &lt;ethanweinberger@gmail.com&gt;
Co-authored-by: Ethan Weinberger &lt;ethanweinberger@Ethans-MacBook-Pro.local&gt;
Co-authored-by: github-actions[bot] &lt;41898282+github-actions[bot]@users.noreply.github.com&gt;
Co-authored-by: ori-kron-wis &lt;175299014+ori-kron-wis@users.noreply.github.com&gt;
Co-authored-by: Justin Hong &lt;justin.hong@columbia.edu&gt;
Co-authored-by: Can Ergen &lt;canergen.ac@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,10 +23,13 @@ to [Semantic Versioning]. Full commit history is available in the
 - Add consideration for missing monitor set during early stopping. {pr}`3226`.
 - Fix bug in SysVI get_normalized_expression function. {pr}`3255`.
 - Add support for IntegratedGradients for multimodal models. {pr}`3264`.
+- Fix bug in resolVI get_normalized expression function. {pr}`3308`.
+- Fix bug in resolVI gene-assay dispersion. {pr}`3308`.
 
 #### Changed
 
 - Updated Scvi-Tools AWS hub to Weizmann instead of Berkeley. {pr}`3246`.
+- Updated resolVI to use rapids-singlecell. {pr}`3308`.
 
 #### Removed
 
@@ -53,7 +56,7 @@ to [Semantic Versioning]. Full commit history is available in the
 - Add scib-metrics support for {class}`scvi.autotune.AutotuneExperiment` and
     {class}`scvi.train._callbacks.ScibCallback` for autotune for scib metrics {pr}`3168`.
 - Add Support of dask arrays in AnnTorchDataset. {pr}`3193`.
-- Add a common use cases section in the docs user guide. {pr}`3200`.
+- Add a {doc}`/user_guide/use_case` section in the docs, {pr}`3200`.
 - Add {class}`scvi.external.SysVI` for cycle consistency loss and VampPrior {pr}`3195`.
 
 #### Fixed
@@ -111,7 +114,7 @@ to [Semantic Versioning]. Full commit history is available in the
 - Added adaptive handling for last training minibatch of 1-2 cells in case of
     `datasplitter_kwargs={"drop_last": False}` and `train_size = None` by moving them into
     validation set, if available. {pr}`3036`.
-- Add `batch_key` and `labels_key` to {meth}`scvi.external.SCAR.setup_anndata`. {pr}`3045`.
+- Add `batch_key` and `labels_key` to `scvi.external.SCAR.setup_anndata`. {pr}`3045`.
 - Implemented variance of ZINB distribution. {pr}`3044`.
 - Support for minified mode while retaining counts to skip the encoder.
 - New Trainingplan argument `update_only_decoder` to use stored latent codes and skip training of
@@ -125,7 +128,7 @@ to [Semantic Versioning]. Full commit history is available in the
 - Breaking Change: Fix `get_outlier_cell_sample_pairs` function in {class}`scvi.external.MRVI`
     to correctly compute the maxmimum log-density across in-sample cells rather than the
     aggregated posterior log-density {pr}`3007`.
-- Fix references to `scvi.external` in {meth}`scvi.external.SCAR.setup_anndata`.
+- Fix references to `scvi.external` in `scvi.external.SCAR.setup_anndata`.
 - Fix gimVI to append mini batches first into CPU during get_imputed and get_latent operations {pr}`3058`.
 
 #### Changed
@@ -137,9 +140,9 @@ to [Semantic Versioning]. Full commit history is available in the
 #### Added
 
 - Add support for Python 3.12 {pr}`2966`.
-- Add support for categorial covariates in scArches in {class}`scvi.model.base.ArchesMixin` {pr}`2936`.
+- Add support for categorial covariates in scArches in `scvi.model.archesmixin` {pr}`2936`.
 - Add assertion error in cellAssign for checking duplicates in celltype markers {pr}`2951`.
-- Add {meth}`scvi.external.POISSONVI.get_region_factors` {pr}`2940`.
+- Add `scvi.external.poissonvi.get_region_factors` {pr}`2940`.
 - {attr}`scvi.settings.dl_persistent_workers` allows using persistent workers in
     {class}`scvi.dataloaders.AnnDataLoader` {pr}`2924`.
 - Add option for using external indexes in data splitting classes that are under `scvi.dataloaders`
diff --git a/src/scvi/external/resolvi/_model.py b/src/scvi/external/resolvi/_model.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import importlib.util
 import logging
 from functools import partial
 from typing import TYPE_CHECKING
@@ -343,7 +342,11 @@ def setup_anndata(
         cls.register_manager(adata_manager)
 
     @staticmethod
-    def _prepare_data(adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None, **kwargs):
+    def _prepare_data(
+        adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None, slice_key=None, **kwargs
+    ):
+        if slice_key is not None:
+            batch_key = slice_key
         try:
             import scanpy
             from sklearn.neighbors._base import _kneighbors_from_graph
@@ -365,13 +368,15 @@ def _prepare_data(adata, n_neighbors=10, spatial_rep="X_spatial", batch_key=None
 
         for index in indices:
             sub_data = adata[index].copy()
-            if importlib.util.find_spec("cuml") is not None:
-                method = "rapids"
-            else:
-                method = "umap"
-            scanpy.pp.neighbors(
-                sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep, method=method
-            )
+            try:
+                import rapids_singlecell
+
+                print("RAPIDS SingleCell is installed and can be imported")
+                rapids_singlecell.pp.neighbors(
+                    sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep
+                )
+            except ImportError:
+                scanpy.pp.neighbors(sub_data, n_neighbors=n_neighbors + 5, use_rep=spatial_rep)
             distances = sub_data.obsp["distances"] ** 2
 
             distance_neighbor[index, :], index_neighbor_batch = _kneighbors_from_graph(
diff --git a/src/scvi/external/resolvi/_module.py b/src/scvi/external/resolvi/_module.py
@@ -163,8 +163,7 @@ def __init__(
             init_px_r = torch.full([n_input, n_batch], 0.01)
         else:
             raise ValueError(
-                "dispersion must be one of ['gene', 'gene-batch', 'gene-label'], but input was "
-                "{}.format(self.dispersion)"
+                f"dispersion must be one of ['gene', 'gene-batch'], but input was {dispersion}."
             )
         self.register_buffer("px_r", init_px_r)
 
@@ -751,8 +750,7 @@ def __init__(
             init_px_r = torch.full([n_input, n_batch], 0.01)
         else:
             raise ValueError(
-                "dispersion must be one of ['gene', 'gene-batch', 'gene-label'], but input was "
-                "{}.format(dispersion)"
+                f"dispersion must be one of ['gene', 'gene-batch'], but input was {dispersion}."
             )
         self.register_buffer("px_r", init_px_r)
         self.register_buffer("per_neighbor_diffusion_init", torch.zeros([n_obs, n_neighbors]))
@@ -868,7 +866,10 @@ def forward(  # not used arguments to have same set of arguments in model and gu
 
             if self.dispersion == "gene-batch":
                 px_r_inv = F.linear(
-                    torch.nn.functional.one_hot(batch_index.flatten(), self.n_batch), px_r_mle
+                    torch.nn.functional.one_hot(batch_index.flatten(), self.n_batch).to(
+                        px_r_mle.dtype
+                    ),
+                    px_r_mle,
                 )
             elif self.dispersion == "gene":
                 px_r_inv = px_r_mle
diff --git a/src/scvi/external/resolvi/_utils.py b/src/scvi/external/resolvi/_utils.py
@@ -229,7 +229,7 @@ def get_normalized_expression(
         library_size
             Scale the expression frequencies to a common library size.
             This allows gene expression levels to be interpreted on a common scale of relevant
-            magnitude. If set to `"latent"`, use the latent library size.
+            magnitude.
         n_samples
             Number of posterior samples to use for estimation.
         n_samples_overall
@@ -301,32 +301,28 @@ def get_normalized_expression(
                     kwargs["batch_index"],
                     *categorical_input,
                 )
-                z = torch.distributions.Normal(qz_m, qz_v.sqrt()).sample(
-                    [
-                        n_samples,
-                    ]
-                )
+                z = torch.distributions.Normal(qz_m, qz_v.sqrt()).sample([n_samples])
 
                 if kwargs["cat_covs"] is not None:
                     categorical_input = list(torch.split(kwargs["cat_covs"], 1, dim=1))
                 else:
                     categorical_input = ()
                 if batch is not None:
-                    batch = torch.full_like(kwargs["batch"], batch)
+                    batch = torch.full_like(kwargs["batch_index"], batch)
                 else:
                     batch = kwargs["batch_index"]
 
                 px_scale, _, px_rate, _ = self.module.model.decoder(
                     self.module.model.dispersion, z, kwargs["library"], batch, *categorical_input
                 )
                 if library_size is not None:
-                    exp_ = library_size * px_scale.reshape(-1, px_scale.shape[-1])
+                    exp_ = library_size * px_scale
                 else:
-                    exp_ = px_rate.reshape(-1, px_scale.shape[-1])
+                    exp_ = px_rate
 
                 exp_ = exp_[..., gene_mask]
                 per_batch_exprs.append(exp_[None].cpu())
-            per_batch_exprs = torch.cat(per_batch_exprs, dim=0).numpy()
+            per_batch_exprs = torch.cat(per_batch_exprs, dim=0).mean(0).numpy()
             exprs.append(per_batch_exprs)
 
         exprs = np.concatenate(exprs, axis=1)
diff --git a/src/scvi/train/_trainer.py b/src/scvi/train/_trainer.py
@@ -211,4 +211,13 @@ def fit(self, *args, **kwargs):
                     category=UserWarning,
                     message="`LightningModule.configure_optimizers` returned `None`",
                 )
-            super().fit(*args, **kwargs)
+            try:
+                super().fit(*args, **kwargs)
+            except NameError:
+                import gc
+
+                gc.collect()
+                import torch
+
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
diff --git a/src/scvi/train/_trainrunner.py b/src/scvi/train/_trainrunner.py
@@ -109,7 +109,16 @@ def __call__(self):
         if hasattr(self.data_splitter, "n_val"):
             self.training_plan.n_obs_validation = self.data_splitter.n_val
 
-        self.trainer.fit(self.training_plan, self.data_splitter)
+        try:
+            self.trainer.fit(self.training_plan, self.data_splitter)
+        except NameError:
+            import gc
+
+            gc.collect()
+            import torch
+
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
         self._update_history()
 
         # data splitter only gets these attrs after fit
diff --git a/tests/external/resolvi/test_resolvi.py b/tests/external/resolvi/test_resolvi.py
@@ -23,6 +23,10 @@ def test_resolvi_train(adata):
     model.train(
         max_epochs=2,
     )
+    model = RESOLVI(adata, dispersion="gene-batch")
+    model.train(
+        max_epochs=2,
+    )
 
 
 def test_resolvi_save_load(adata):
@@ -52,8 +56,21 @@ def test_resolvi_downstream(adata):
     )
     latent = model.get_latent_representation()
     assert latent.shape == (adata.n_obs, model.module.n_latent)
+    counts = model.get_normalized_expression(n_samples=31, library_size=10000)
+    counts = model.get_normalized_expression_importance(n_samples=30, library_size=10000)
+    print("FFFFFF", counts.shape)
     model.differential_expression(groupby="labels")
     model.differential_expression(groupby="labels", weights="importance")
+    model.sample_posterior(
+        model=model.module.model_residuals,
+        num_samples=30,
+        return_samples=False,
+        return_sites=None,
+        batch_size=1000,
+    )
+    model.sample_posterior(
+        model=model.module.model_residuals, num_samples=30, return_samples=False, batch_size=1000
+    )
     model_query = model.load_query_data(reference_model=model, adata=adata)
     model_query = model.load_query_data(reference_model="test_resolvi", adata=adata)
     model_query.train(