From ecd4841ccff92527ecbc5961ed0cd82963512421 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 30 Aug 2018 16:34:23 -0400
Subject: [PATCH 01/32] don't return a sparse dataframe

---
 python/magic/magic.py | 3 ++-
 python/magic/utils.py | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/magic/magic.py b/python/magic/magic.py
index 963a6028..4b617a61 100644
--- a/python/magic/magic.py
+++ b/python/magic/magic.py
@@ -504,7 +504,8 @@ def transform(self, X=None, genes=None, t_max=20,
         else:
             X_magic = graph.inverse_transform(X_magic, columns=genes)
             # convert back to pandas dataframe, if necessary
-        X_magic = utils.convert_to_same_format(X_magic, X, columns=genes)
+        X_magic = utils.convert_to_same_format(X_magic, X, columns=genes,
+                                               prevent_sparse=True)
         return X_magic
 
     def fit_transform(self, X, **kwargs):
diff --git a/python/magic/utils.py b/python/magic/utils.py
index 3140e680..654bb83d 100644
--- a/python/magic/utils.py
+++ b/python/magic/utils.py
@@ -111,10 +111,13 @@ def matrix_is_equivalent(X, Y):
                       np.sum((X != Y).sum()) == 0)
 
 
-def convert_to_same_format(data, target_data, columns=None):
+def convert_to_same_format(data, target_data, columns=None, prevent_sparse=False):
     # create new data object
     if isinstance(target_data, pd.SparseDataFrame):
-        data = pd.SparseDataFrame(data)
+        if prevent_sparse:
+            data = pd.DataFrame(data)
+        else:
+            data = pd.SparseDataFrame(data)
         pandas = True
     elif isinstance(target_data, pd.DataFrame):
         data = pd.DataFrame(data)

From ca8f6dbef33a0c44dca8b098ffd190a0d960ddfa Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 31 Aug 2018 18:06:00 -0400
Subject: [PATCH 02/32] more informative error message on load failure; address
 https://github.com/KrishnaswamyLab/phateR/issues/21

---
 Rmagic/R/utils.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index c7346900..edfc87c5 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -10,8 +10,14 @@ null_equal <- function(x, y) {
 }
 
 load_pymagic <- function(delay_load = FALSE) {
-  result <- try(pymagic <<- reticulate::import("magic", delay_load = delay_load))
-  if (methods::is(result, "try-error")) {
+    if (is.null(pymagic)) {
+    result <- try(pymagic <<- reticulate::import("magic", delay_load = delay_load))
+  } else {
+    result <- try(reticulate::import("magic", delay_load = delay_load))
+  }
+  if (methods::is(result, "try-error") &&
+      (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
+        length(grep("ImportError: No module named magic", result)) > 0)) {
     install.magic()
   }
 }

From 6e38540f842372a9a4d6387dd385ea9220c86d90 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sun, 2 Sep 2018 11:51:25 -0400
Subject: [PATCH 03/32] preferantially load python with magic installed

---
 Rmagic/R/utils.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index edfc87c5..bf413f78 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -63,5 +63,6 @@ install.magic <- function(envname = "r-reticulate", method = "auto",
 pymagic <- NULL
 
 .onLoad <- function(libname, pkgname) {
+  py_config <- reticulate::py_discover_config(required_module = "magic")
   load_pymagic(delay_load = TRUE)
 }

From 14bd1c9ace562f9bcc8b1c995580db8c37bf1d1a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 11:23:50 -0400
Subject: [PATCH 04/32] recommend people remove the r-reticulate environment if
 things aren't working

---
 Rmagic/R/utils.R | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index bf413f78..dd27b6f3 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -19,6 +19,13 @@ load_pymagic <- function(delay_load = FALSE) {
       (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
         length(grep("ImportError: No module named magic", result)) > 0)) {
     install.magic()
+  } else if (grep("r\-reticulate", reticulate::py_config()$python)) {
+    message("Consider removing the 'r-reticulate' environment by running:")
+    if (grep("virtualenvs", reticulate::py_config()$python)) {
+      message("reticulate::virtualenv_remove('r-reticulate')")
+    } else {
+      message("reticulate::conda_remove('r-reticulate')")
+    }
   }
 }
 

From 9cc139c295e570c66b852236d588c7ea2a7156fd Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 11:40:11 -0400
Subject: [PATCH 05/32] fix grep typo

---
 Rmagic/R/utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index dd27b6f3..875da29a 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -19,7 +19,7 @@ load_pymagic <- function(delay_load = FALSE) {
       (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
         length(grep("ImportError: No module named magic", result)) > 0)) {
     install.magic()
-  } else if (grep("r\-reticulate", reticulate::py_config()$python)) {
+  } else if (grep("r\\-reticulate", reticulate::py_config()$python)) {
     message("Consider removing the 'r-reticulate' environment by running:")
     if (grep("virtualenvs", reticulate::py_config()$python)) {
       message("reticulate::virtualenv_remove('r-reticulate')")

From 62c84732de738c835cb0b9efab1ae9b210965bf6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 12:04:42 -0400
Subject: [PATCH 06/32] fix grep length zero

---
 Rmagic/R/utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index 875da29a..4d6033c2 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -19,7 +19,7 @@ load_pymagic <- function(delay_load = FALSE) {
       (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
         length(grep("ImportError: No module named magic", result)) > 0)) {
     install.magic()
-  } else if (grep("r\\-reticulate", reticulate::py_config()$python)) {
+  } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) {
     message("Consider removing the 'r-reticulate' environment by running:")
     if (grep("virtualenvs", reticulate::py_config()$python)) {
       message("reticulate::virtualenv_remove('r-reticulate')")

From 6035c0a4f1080aed45ad5e55e01509723736eb7b Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 12:33:50 -0400
Subject: [PATCH 07/32] bump tasklogger version

---
 python/requirements.txt | 2 +-
 python/setup.py         | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/requirements.txt b/python/requirements.txt
index 637a35a9..27005d46 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -5,4 +5,4 @@ matplotlib
 scikit-learn>=0.19.1
 graphtools>=0.1.8
 future
-tasklogger>=0.2
+tasklogger>=0.2.1
diff --git a/python/setup.py b/python/setup.py
index 87fd29c4..8371b82d 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -8,12 +8,13 @@
     'scipy>=1.1.0',
     'matplotlib',
     'scikit-learn>=0.19.1',
-    'tasklogger>=0.2',
+    'tasklogger>=0.2.1',
     'graphtools>=0.1.9',
 ]
 
 test_requires = [
     'nose2',
+    'scprep',
 ]
 
 if sys.version_info[0] == 3:

From bc6ea1b66483b2228a20d9cf72e1ce961f4091ec Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 12:35:00 -0400
Subject: [PATCH 08/32] use scprep

---
 python/magic/io.py            | 15 +++++++++++++++
 python/magic/preprocessing.py |  5 +++++
 python/magic/test.py          | 25 ++++++++++++++-----------
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/python/magic/io.py b/python/magic/io.py
index 44abcebd..b0525ec6 100644
--- a/python/magic/io.py
+++ b/python/magic/io.py
@@ -203,6 +203,9 @@ def load_csv(filename, cell_axis='row', delimiter=',',
     -------
     data : pd.DataFrame
     """
+    warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
     if cell_axis not in ['row', 'column', 'col']:
         raise ValueError(
             "cell_axis {} not recognized. Expected 'row' or 'column'".format(
@@ -309,6 +312,9 @@ def load_fcs(filename, gene_names=True, cell_names=True,
     -------
     data : pd.DataFrame
     """
+    warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
     if cell_names is True:
         cell_names = None
     if gene_names is True:
@@ -347,6 +353,9 @@ def load_mtx(mtx_file, cell_axis='row',
     -------
     data : pd.DataFrame
     """
+    warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
     if cell_axis not in ['row', 'column', 'col']:
         raise ValueError(
             "cell_axis {} not recognized. Expected 'row' or 'column'".format(
@@ -435,6 +444,9 @@ def load_10X(data_dir, sparse=True, gene_labels='symbol',
     data: pandas.DataFrame shape = (n_cell, n_genes)
         imported data matrix
     """
+    warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
 
     if gene_labels not in ['id', 'symbol', 'both']:
         raise ValueError("gene_labels not in ['id', 'symbol', 'both']")
@@ -551,6 +563,9 @@ def load_10X_HDF5(filename, genome=None, sparse=True, gene_labels='symbol',
         If sparse, data will be a pd.SparseDataFrame. Otherwise, data will
         be a pd.DataFrame.
     """
+    warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
     with tables.open_file(filename, 'r') as f:
         if genome is None:
             genomes = [node._v_name for node in f.list_nodes(f.root)]
diff --git a/python/magic/preprocessing.py b/python/magic/preprocessing.py
index 85700c08..a4b0bc91 100644
--- a/python/magic/preprocessing.py
+++ b/python/magic/preprocessing.py
@@ -6,6 +6,7 @@
 import numpy as np
 from scipy import sparse
 import pandas as pd
+import warnings
 
 
 def library_size_normalize(data, verbose=False):
@@ -25,6 +26,10 @@ def library_size_normalize(data, verbose=False):
     data_norm : ndarray [n, p]
         2 dimensional array with normalized gene expression values
     """
+    warnings.warn("magic.preprocessing is deprecated. "
+                  "Please use scprep.normalize instead. "
+                  "Read more at http://scprep.readthedocs.io",
+                  FutureWarning)
     if verbose:
         print("Normalizing library sizes for %s cells" % (data.shape[0]))
 
diff --git a/python/magic/test.py b/python/magic/test.py
index 2f9d00e4..55006037 100644
--- a/python/magic/test.py
+++ b/python/magic/test.py
@@ -3,8 +3,8 @@
 
 from __future__ import print_function, division, absolute_import
 import magic
-import pandas as pd
 import numpy as np
+import scprep
 try:
     import anndata
 except (ImportError, SyntaxError):
@@ -13,23 +13,26 @@
 
 
 def test_scdata():
-    scdata = pd.read_csv("../data/test_data.csv")
-    scdata_norm = magic.preprocessing.library_size_normalize(scdata)
+    scdata = scprep.io.read_csv("../data/test_data.csv")
+    scdata_norm = scprep.filter.remove_empty_cells(scdata)
+    scdata_norm = scprep.filter.remove_empty_genes(scdata)
+    scdata_norm = scprep.normalize.library_size_normalize(scdata_norm)
+    scdata_norm = scprep.transform.sqrt(scdata_norm)
     assert scdata.shape == scdata_norm.shape
-    fast_magic_operator = magic.MAGIC(t='auto', a=20, k=10)
-    str_gene_magic = fast_magic_operator.fit_transform(
+    magic_op = magic.MAGIC(t='auto', a=20, k=10)
+    str_gene_magic = magic_op.fit_transform(
         scdata_norm, genes=['VIM', 'ZEB1'])
-    int_gene_magic = fast_magic_operator.fit_transform(
+    int_gene_magic = magic_op.fit_transform(
         scdata_norm, genes=[-2, -1])
     assert str_gene_magic.shape[0] == scdata_norm.shape[0]
     assert np.all(str_gene_magic == int_gene_magic)
-    pca_magic = fast_magic_operator.fit_transform(
+    pca_magic = magic_op.fit_transform(
         scdata_norm, genes="pca_only")
     assert pca_magic.shape[0] == scdata_norm.shape[0]
-    assert pca_magic.shape[1] == fast_magic_operator.n_pca
-    fast_magic = fast_magic_operator.fit_transform(scdata_norm,
-                                                   genes="all_genes")
-    assert scdata_norm.shape == fast_magic.shape
+    assert pca_magic.shape[1] == magic_op.n_pca
+    magic_all_genes = magic_op.fit_transform(scdata_norm,
+                                             genes="all_genes")
+    assert scdata_norm.shape == magic_all_genes.shape
 
 
 def test_anndata():

From b9ce6cfea5f50fa12e237db68c5bb24de19c66e6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 12:56:23 -0400
Subject: [PATCH 09/32] filter on scdata

---
 python/magic/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/magic/test.py b/python/magic/test.py
index 55006037..f2163f2d 100644
--- a/python/magic/test.py
+++ b/python/magic/test.py
@@ -13,10 +13,10 @@
 
 
 def test_scdata():
-    scdata = scprep.io.read_csv("../data/test_data.csv")
-    scdata_norm = scprep.filter.remove_empty_cells(scdata)
-    scdata_norm = scprep.filter.remove_empty_genes(scdata)
-    scdata_norm = scprep.normalize.library_size_normalize(scdata_norm)
+    scdata = scprep.io.load_csv("../data/test_data.csv")
+    scdata = scprep.filter.remove_empty_cells(scdata)
+    scdata = scprep.filter.remove_empty_genes(scdata)
+    scdata_norm = scprep.normalize.library_size_normalize(scdata)
     scdata_norm = scprep.transform.sqrt(scdata_norm)
     assert scdata.shape == scdata_norm.shape
     magic_op = magic.MAGIC(t='auto', a=20, k=10)

From bba449fdeb4da49cf5f8bc607f8b474d75c8f486 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 6 Sep 2018 16:52:58 -0400
Subject: [PATCH 10/32] request before installation

---
 Rmagic/R/utils.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index 4d6033c2..4e20c661 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -18,7 +18,9 @@ load_pymagic <- function(delay_load = FALSE) {
   if (methods::is(result, "try-error") &&
       (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
         length(grep("ImportError: No module named magic", result)) > 0)) {
-    install.magic()
+      if (menu(c("Yes", "No"), title="Install MAGIC Python package with reticulate?") == 1) {
+        install.magic()
+      }
   } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) {
     message("Consider removing the 'r-reticulate' environment by running:")
     if (grep("virtualenvs", reticulate::py_config()$python)) {
@@ -57,6 +59,7 @@ install.magic <- function(envname = "r-reticulate", method = "auto",
       envname = envname, method = method,
       conda = conda, pip=pip, ...
     )
+    message("Install complete. Please restart R and try again.")
   },
   error = function(e) {
     stop(paste0(

From 9664a99cc451500520913d81b4a3690c3c642e34 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 6 Sep 2018 16:53:55 -0400
Subject: [PATCH 11/32] allow passing of a precomputed graph

---
 python/magic/magic.py | 58 +++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/python/magic/magic.py b/python/magic/magic.py
index 4b617a61..db6b03d5 100644
--- a/python/magic/magic.py
+++ b/python/magic/magic.py
@@ -59,11 +59,9 @@ class MAGIC(BaseEstimator):
         roughly log(n_samples) time.
 
     knn_dist : string, optional, default: 'euclidean'
-        recommended values: 'euclidean', 'cosine', 'precomputed'
+        recommended values: 'euclidean', 'cosine'
         Any metric from `scipy.spatial.distance` can be used
-        distance metric for building kNN graph. If 'precomputed',
-        `data` should be an n_samples x n_samples distance or
-        affinity matrix
+        distance metric for building kNN graph.
 
     n_jobs : integer, optional, default: 1
         The number of jobs to use for the computation.
@@ -178,7 +176,7 @@ def _check_params(self):
                            a=self.a)
         utils.check_if_not('auto', utils.check_positive, utils.check_int,
                            t=self.t)
-        utils.check_in(['euclidean', 'precomputed', 'cosine', 'correlation',
+        utils.check_in(['euclidean', 'cosine', 'correlation',
                         'cityblock', 'l1', 'l2', 'manhattan', 'braycurtis',
                         'canberra', 'chebyshev', 'dice', 'hamming', 'jaccard',
                         'kulsinski', 'mahalanobis', 'matching', 'minkowski',
@@ -221,11 +219,9 @@ def set_params(self, **params):
             roughly log(n_samples) time.
 
         knn_dist : string, optional, default: 'euclidean'
-            recommended values: 'euclidean', 'cosine', 'precomputed'
+            recommended values: 'euclidean', 'cosine'
             Any metric from `scipy.spatial.distance` can be used
-            distance metric for building kNN graph. If 'precomputed',
-            `data` should be an n_samples x n_samples distance or
-            affinity matrix
+            distance metric for building kNN graph.
 
         n_jobs : integer, optional, default: 1
             The number of jobs to use for the computation.
@@ -297,7 +293,7 @@ def set_params(self, **params):
         self._check_params()
         return self
 
-    def fit(self, X):
+    def fit(self, X, graph=None):
         """Computes the diffusion operator
 
         Parameters
@@ -306,42 +302,34 @@ def fit(self, X):
             input data with `n_samples` samples and `n_features`
             dimensions. Accepted data types: `numpy.ndarray`,
             `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`.
+        graph : `graphtools.Graph`, optional (default: None)
+            If given, provides a precomputed kernel matrix with which to
+            perform diffusion.
 
         Returns
         -------
         magic_operator : MAGIC
             The estimator object
         """
-        if self.knn_dist == 'precomputed':
-            if isinstance(X, sparse.coo_matrix):
-                X = X.tocsr()
-            if X[0, 0] == 0:
-                precomputed = "distance"
-            else:
-                precomputed = "affinity"
-            tasklogger.log_info(
-                "Using precomputed {} matrix...".format(precomputed))
+        if self.n_pca is None or X.shape[1] <= self.n_pca:
             n_pca = None
         else:
-            precomputed = None
-            if self.n_pca is None or X.shape[1] <= self.n_pca:
-                n_pca = None
-            else:
-                n_pca = self.n_pca
+            n_pca = self.n_pca
+        if graph is None:
+            graph = self.graph
 
-        if self.graph is not None:
+        if graph is not None:
             if self.X is not None and not \
                     utils.matrix_is_equivalent(X, self.X):
                 """
                 If the same data is used, we can reuse existing kernel and
                 diffusion matrices. Otherwise we have to recompute.
                 """
-                self.graph = None
+                graph = None
             else:
                 try:
-                    self.graph.set_params(
+                    graph.set_params(
                         decay=self.a, knn=self.k + 1, distance=self.knn_dist,
-                        precomputed=precomputed,
                         n_jobs=self.n_jobs, verbose=self.verbose, n_pca=n_pca,
                         thresh=1e-4, random_state=self.random_state)
                     tasklogger.log_info(
@@ -350,7 +338,7 @@ def fit(self, X):
                     # something changed that should have invalidated the graph
                     tasklogger.log_debug(
                         "Reset graph due to {}".format(str(e)))
-                    self.graph = None
+                    graph = None
 
         self.X = X
 
@@ -358,7 +346,7 @@ def fit(self, X):
             warnings.warn("Input matrix contains unexpressed genes. "
                           "Please remove them prior to running MAGIC.")
 
-        if self.graph is None:
+        if graph is None:
             # reset X_magic in case it was previously set
             self.X_magic = None
             tasklogger.log_start("graph and diffusion operator")
@@ -372,6 +360,8 @@ def fit(self, X):
                 verbose=self.verbose,
                 random_state=self.random_state)
             tasklogger.log_complete("graph and diffusion operator")
+        else:
+            self.graph = graph
 
         return self
 
@@ -508,7 +498,7 @@ def transform(self, X=None, genes=None, t_max=20,
                                                prevent_sparse=True)
         return X_magic
 
-    def fit_transform(self, X, **kwargs):
+    def fit_transform(self, X, graph=None, **kwargs):
         """Computes the diffusion operator and the position of the cells in the
         embedding space
 
@@ -519,6 +509,10 @@ def fit_transform(self, X, **kwargs):
             dimensions. Accepted data types: `numpy.ndarray`,
             `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`.
 
+        graph : `graphtools.Graph`, optional (default: None)
+            If given, provides a precomputed kernel matrix with which to
+            perform diffusion.
+
         kwargs : further arguments for `PHATE.transform()`
             Keyword arguments as specified in :func:`~phate.PHATE.transform`
 
@@ -528,7 +522,7 @@ def fit_transform(self, X, **kwargs):
             The gene expression values after diffusion
         """
         tasklogger.log_start('MAGIC')
-        self.fit(X)
+        self.fit(X, graph=graph)
         X_magic = self.transform(**kwargs)
         tasklogger.log_complete('MAGIC')
         return X_magic

From f6a7124a5b5f8a4f04e28556300c5551810eaf68 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sun, 9 Sep 2018 20:04:26 -0400
Subject: [PATCH 12/32] avoid plt.show

---
 python/magic/plot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/magic/plot.py b/python/magic/plot.py
index c0d03724..a3bea232 100644
--- a/python/magic/plot.py
+++ b/python/magic/plot.py
@@ -158,6 +158,6 @@ def animate(i):
         plt.close()
     elif show:
         plt.tight_layout()
-        plt.show(block=False)
+        fig.show()
 
     return ani

From 2954a53788e524bbba8050a54624621eaf62cbfb Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sun, 9 Sep 2018 20:04:36 -0400
Subject: [PATCH 13/32] mention CRAN help docs

---
 README.md         | 4 ++--
 Rmagic/README.Rmd | 2 +-
 Rmagic/README.md  | 5 ++++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index ca93bfa9..fc599f73 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ The following code runs MAGIC on test data located in the MAGIC repository.
 
 #### Tutorials
 
-We have included two tutorial notebooks on MAGIC usage and results visualization for single cell RNA-seq data.
+You can read the MAGIC documentation at https://magic.readthedocs.io/. We have included two tutorial notebooks on MAGIC usage and results visualization for single cell RNA-seq data.
 
 EMT data notebook: http://nbviewer.jupyter.org/github/KrishnaswamyLab/MAGIC/blob/master/python/tutorial_notebooks/emt_tutorial.ipynb
 
@@ -134,7 +134,7 @@ After installing the package, MAGIC can be run by loading the library and callin
 
 #### Tutorials
 
-For a working example, see the Rmarkdown tutorials at <http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/bonemarrow_tutorial.html> and <http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/emt_tutorial.html> or in `Rmagic/inst/examples`.
+You can read the MAGIC tutorial by running `help(Rmagic::magic)`. For a working example, see the Rmarkdown tutorials at <http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/bonemarrow_tutorial.html> and <http://htmlpreview.github.io/?https://github.com/KrishnaswamyLab/MAGIC/blob/master/Rmagic/inst/examples/emt_tutorial.html> or in `Rmagic/inst/examples`.
 
 ## Help
 
diff --git a/Rmagic/README.Rmd b/Rmagic/README.Rmd
index a63421ec..16c5ea41 100644
--- a/Rmagic/README.Rmd
+++ b/Rmagic/README.Rmd
@@ -181,4 +181,4 @@ ggplot(data_PHATE) +
 
 ## Help
 
-If you have any questions or require assistance using MAGIC, please contact us at <https://krishnaswamylab.org/get-help>.
+Please let us know of any issues at the [GitHub repo](https://github.com/KrishnaswamyLab/MAGIC/issues). If you have any questions or require assistance using MAGIC, please read the documentation by running `help(Rmagic::magic)` or contact us at <https://krishnaswamylab.org/get-help>.
diff --git a/Rmagic/README.md b/Rmagic/README.md
index 1b301256..7ed5ee7d 100644
--- a/Rmagic/README.md
+++ b/Rmagic/README.md
@@ -236,4 +236,7 @@ ggplot(data_PHATE) +
 
 ## Help
 
-If you have any questions or require assistance using MAGIC, please contact us at <https://krishnaswamylab.org/get-help>.
+Please let us know of any issues at the [GitHub
+repo](https://github.com/KrishnaswamyLab/MAGIC/issues). If you have any
+questions or require assistance using MAGIC, please read the documentation
+by running `help(Rmagic::magic)` or contact us at <https://krishnaswamylab.org/get-help>.

From 47ca2adc63b24efc6fb3f96d80c4fcbb11da83d3 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 10 Sep 2018 14:04:36 -0400
Subject: [PATCH 14/32] document

---
 Rmagic/DESCRIPTION          | 2 +-
 Rmagic/man/install.magic.Rd | 4 ++--
 Rmagic/man/magic.Rd         | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Rmagic/DESCRIPTION b/Rmagic/DESCRIPTION
index 7b13561f..322b70b3 100644
--- a/Rmagic/DESCRIPTION
+++ b/Rmagic/DESCRIPTION
@@ -20,4 +20,4 @@ Suggests:
 	phateR
 License: GPL-2 | file LICENSE
 LazyData: true
-RoxygenNote: 6.0.1
+RoxygenNote: 6.1.0
diff --git a/Rmagic/man/install.magic.Rd b/Rmagic/man/install.magic.Rd
index 6f216fe0..e25bd0da 100644
--- a/Rmagic/man/install.magic.Rd
+++ b/Rmagic/man/install.magic.Rd
@@ -4,8 +4,8 @@
 \alias{install.magic}
 \title{Install MAGIC Python Package}
 \usage{
-install.magic(envname = "r-reticulate", method = "auto", conda = "auto",
-  pip = TRUE, ...)
+install.magic(envname = "r-reticulate", method = "auto",
+  conda = "auto", pip = TRUE, ...)
 }
 \arguments{
 \item{envname}{Name of environment to install packages into}
diff --git a/Rmagic/man/magic.Rd b/Rmagic/man/magic.Rd
index 484e9e0d..95dd856c 100644
--- a/Rmagic/man/magic.Rd
+++ b/Rmagic/man/magic.Rd
@@ -4,9 +4,10 @@
 \alias{magic}
 \title{Perform MAGIC on a data matrix}
 \usage{
-magic(data, genes = NULL, k = 10, alpha = 15, t = "auto", npca = 100,
-  init = NULL, t.max = 20, knn.dist.method = "euclidean", verbose = 1,
-  n.jobs = 1, seed = NULL)
+magic(data, genes = NULL, k = 10, alpha = 15, t = "auto",
+  npca = 100, init = NULL, t.max = 20,
+  knn.dist.method = "euclidean", verbose = 1, n.jobs = 1,
+  seed = NULL)
 }
 \arguments{
 \item{data}{input data matrix}

From c171495a8f5b8c9399ce9374bc2f9b237b8f3538 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 10 Sep 2018 19:26:58 -0400
Subject: [PATCH 15/32] manually import utils::menu

---
 Rmagic/DESCRIPTION | 2 +-
 Rmagic/R/utils.R   | 2 +-
 Rmagic/README.Rmd  | 2 +-
 Rmagic/README.md   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Rmagic/DESCRIPTION b/Rmagic/DESCRIPTION
index 322b70b3..36d12081 100644
--- a/Rmagic/DESCRIPTION
+++ b/Rmagic/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: Rmagic
 Type: Package
 Title: MAGIC - Markov Affinity-Based Graph Imputation of Cells
-Version: 1.0.0
+Version: 1.1.0
 Authors@R: c(person(given = "David", family = "van Dijk", email = "davidvandijk@gmail.com", role = c("aut")),
 		  person(given = 'Scott', family = 'Gigante', email = 'scott.gigante@yale.edu', role = 'cre', comment = c(ORCID = '0000-0002-4544-2764')))
 Maintainer: Scott Gigante <scott.gigante@yale.edu>
diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index 4e20c661..ef458f19 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -18,7 +18,7 @@ load_pymagic <- function(delay_load = FALSE) {
   if (methods::is(result, "try-error") &&
       (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
         length(grep("ImportError: No module named magic", result)) > 0)) {
-      if (menu(c("Yes", "No"), title="Install MAGIC Python package with reticulate?") == 1) {
+      if (utils::menu(c("Yes", "No"), title="Install MAGIC Python package with reticulate?") == 1) {
         install.magic()
       }
   } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) {
diff --git a/Rmagic/README.Rmd b/Rmagic/README.Rmd
index 16c5ea41..1d9a2d30 100644
--- a/Rmagic/README.Rmd
+++ b/Rmagic/README.Rmd
@@ -1,5 +1,5 @@
 ---
-title : Rmagic v1.0.0
+title : Rmagic v1.1.0
 output: github_document
 toc: true
 ---
diff --git a/Rmagic/README.md b/Rmagic/README.md
index 7ed5ee7d..d6bdf6d2 100644
--- a/Rmagic/README.md
+++ b/Rmagic/README.md
@@ -1,4 +1,4 @@
-Rmagic v1.0.0
+Rmagic v1.1.0
 ================
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->

From 88581c1271119d7109be75842340e1cf6a4215b6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 15:24:22 -0400
Subject: [PATCH 16/32] add dremi function to magic op - resolves #20

---
 python/magic/magic.py | 100 ++++++++++++++++++++++++++++++++----------
 python/magic/test.py  |   5 +++
 2 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/python/magic/magic.py b/python/magic/magic.py
index db6b03d5..d79cc0c5 100644
--- a/python/magic/magic.py
+++ b/python/magic/magic.py
@@ -19,6 +19,7 @@
 import pandas as pd
 import numbers
 import tasklogger
+import scprep
 
 from . import utils
 
@@ -116,6 +117,7 @@ class MAGIC(BaseEstimator):
     >>> plt.show()
     >>> magic.plot.animate_magic(X, gene_x='VIM', gene_y='CDH1',
     ...                          gene_color='ZEB1', operator=magic_operator)
+    >>> dremi = magic_operator.knnDREMI('VIM', 'CDH1', plot=True)
 
     References
     ----------
@@ -315,30 +317,32 @@ def fit(self, X, graph=None):
             n_pca = None
         else:
             n_pca = self.n_pca
+
         if graph is None:
             graph = self.graph
-
-        if graph is not None:
             if self.X is not None and not \
                     utils.matrix_is_equivalent(X, self.X):
                 """
                 If the same data is used, we can reuse existing kernel and
                 diffusion matrices. Otherwise we have to recompute.
                 """
+                tasklogger.log_debug(
+                    "Reset graph due to difference in input data")
+                graph = None
+
+        if graph is not None:
+            try:
+                graph.set_params(
+                    decay=self.a, knn=self.k + 1, distance=self.knn_dist,
+                    n_jobs=self.n_jobs, verbose=self.verbose, n_pca=n_pca,
+                    thresh=1e-4, random_state=self.random_state)
+                tasklogger.log_info(
+                    "Using precomputed graph and diffusion operator...")
+            except ValueError as e:
+                # something changed that should have invalidated the graph
+                tasklogger.log_debug(
+                    "Reset graph due to {}".format(str(e)))
                 graph = None
-            else:
-                try:
-                    graph.set_params(
-                        decay=self.a, knn=self.k + 1, distance=self.knn_dist,
-                        n_jobs=self.n_jobs, verbose=self.verbose, n_pca=n_pca,
-                        thresh=1e-4, random_state=self.random_state)
-                    tasklogger.log_info(
-                        "Using precomputed graph and diffusion operator...")
-                except ValueError as e:
-                    # something changed that should have invalidated the graph
-                    tasklogger.log_debug(
-                        "Reset graph due to {}".format(str(e)))
-                    graph = None
 
         self.X = X
 
@@ -482,8 +486,8 @@ def transform(self, X=None, genes=None, t_max=20,
         if store_result and self.X_magic is not None:
             X_magic = self.X_magic
         else:
-            X_magic = self.impute(graph, t_max=t_max,
-                                  plot=plot_optimal_t, ax=ax)
+            X_magic = self._impute(graph, t_max=t_max,
+                                   plot=plot_optimal_t, ax=ax)
             if store_result:
                 self.X_magic = X_magic
 
@@ -527,8 +531,8 @@ def fit_transform(self, X, graph=None, **kwargs):
         tasklogger.log_complete('MAGIC')
         return X_magic
 
-    def calculate_error(self, data, data_prev=None, weights=None,
-                        subsample_genes=None):
+    def _calculate_error(self, data, data_prev=None, weights=None,
+                         subsample_genes=None):
         """Calculates difference before and after diffusion
 
         Parameters
@@ -562,8 +566,8 @@ def calculate_error(self, data, data_prev=None, weights=None,
             error = None
         return error, data
 
-    def impute(self, data, t_max=20, plot=False, ax=None,
-               max_genes_compute_t=500, threshold=0.001):
+    def _impute(self, data, t_max=20, plot=False, ax=None,
+                max_genes_compute_t=500, threshold=0.001):
         """Peform MAGIC imputation
 
         Parameters
@@ -603,7 +607,7 @@ def impute(self, data, t_max=20, plot=False, ax=None,
         else:
             weights = None
         if self.t == 'auto':
-            _, data_prev = self.calculate_error(
+            _, data_prev = self._calculate_error(
                 data_imputed, data_prev=None,
                 weights=weights,
                 subsample_genes=subsample_genes)
@@ -634,7 +638,7 @@ def impute(self, data, t_max=20, plot=False, ax=None,
                 i += 1
                 data_imputed = self.diff_op.dot(data_imputed)
                 if self.t == 'auto':
-                    error, data_prev = self.calculate_error(
+                    error, data_prev = self._calculate_error(
                         data_imputed, data_prev,
                         weights=weights,
                         subsample_genes=subsample_genes)
@@ -659,7 +663,7 @@ def impute(self, data, t_max=20, plot=False, ax=None,
                 while i < t_max:
                     i += 1
                     data_overimputed = self.diff_op.dot(data_overimputed)
-                    error, data_prev = self.calculate_error(
+                    error, data_prev = self._calculate_error(
                         data_overimputed, data_prev,
                         weights=weights,
                         subsample_genes=subsample_genes)
@@ -687,3 +691,51 @@ def impute(self, data, t_max=20, plot=False, ax=None,
                 plt.show(block=False)
 
         return data_imputed
+
+    def knnDREMI(self, gene_x, gene_y,
+                 k=10, n_bins=20, n_mesh=3, n_jobs=1,
+                 plot=False, **kwargs):
+        """Calculate kNN-DREMI on MAGIC output
+
+        Calculates k-Nearest Neighbor conditional Density Resampled Estimate of
+        Mutual Information as defined in Van Dijk et al, 2018. [1]_
+
+        Note that kNN-DREMI, like Mutual Information and DREMI, is not
+        symmetric. Here we are estimating I(Y|X).
+
+        Parameters
+        ----------
+        gene_x : array-like, shape=[n_samples]
+            Gene shown on the x axis (independent feature)
+        gene_y : array-like, shape=[n_samples]
+            Gene shown on the y axis (dependent feature)
+        k : int, range=[0:n_samples), optional (default: 10)
+            Number of neighbors
+        n_bins : int, range=[0:inf), optional (default: 20)
+            Number of bins for density resampling
+        n_mesh : int, range=[0:inf), optional (default: 3)
+            In each bin, density will be calculcated around (mesh ** 2) points
+        n_jobs : int, optional (default: 1)
+            Number of threads used for kNN calculation
+        plot : bool, optional (default: False)
+            If True, DREMI create plots of the data like those seen in
+            Fig 5C/D of van Dijk et al. 2018. (doi:10.1016/j.cell.2018.05.061).
+        **kwargs : additional arguments for `scprep.stats.plot_knnDREMI`
+
+        Returns
+        -------
+        dremi : float
+            kNN condtional Density resampled estimate of mutual information
+
+        References
+        ----------
+        .. [1] van Dijk D *et al.* (2018),
+            *Recovering Gene Interactions from Single-Cell Data Using Data
+            Diffusion*, `Cell <https://doi.org/10.1016/j.cell.2018.05.061>`_.
+        """
+        data = self.transform(genes=[gene_x, gene_y])
+        dremi = scprep.stats.knnDREMI(
+            data[gene_x], data[gene_y],
+            k=k, n_bins=n_bins, n_mesh=n_mesh, n_jobs=n_jobs,
+            plot=plot, **kwargs)
+        return dremi
diff --git a/python/magic/test.py b/python/magic/test.py
index f2163f2d..c6344ef0 100644
--- a/python/magic/test.py
+++ b/python/magic/test.py
@@ -2,6 +2,8 @@
 
 
 from __future__ import print_function, division, absolute_import
+import matplotlib as mpl
+mpl.use("agg")
 import magic
 import numpy as np
 import scprep
@@ -19,6 +21,7 @@ def test_scdata():
     scdata_norm = scprep.normalize.library_size_normalize(scdata)
     scdata_norm = scprep.transform.sqrt(scdata_norm)
     assert scdata.shape == scdata_norm.shape
+    np.random.seed(42)
     magic_op = magic.MAGIC(t='auto', a=20, k=10)
     str_gene_magic = magic_op.fit_transform(
         scdata_norm, genes=['VIM', 'ZEB1'])
@@ -33,6 +36,8 @@ def test_scdata():
     magic_all_genes = magic_op.fit_transform(scdata_norm,
                                              genes="all_genes")
     assert scdata_norm.shape == magic_all_genes.shape
+    dremi = magic_op.knnDREMI("VIM", "ZEB1", plot=True)
+    np.testing.assert_allclose(dremi, 1.5687165)
 
 
 def test_anndata():

From acb3cd3c46e9d9d201e0fbfad6a54691cbfbe49a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 17:29:50 -0400
Subject: [PATCH 17/32] fix example typo

---
 Rmagic/R/magic.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Rmagic/R/magic.R b/Rmagic/R/magic.R
index f3a57e83..013f6390 100644
--- a/Rmagic/R/magic.R
+++ b/Rmagic/R/magic.R
@@ -39,7 +39,7 @@
 #' @param seed int or `NULL`, random state (default: `NULL`)
 #'
 #' @examples
-#' if (reticulate::py_module_available("phate")) {
+#' if (reticulate::py_module_available("magic")) {
 #'
 #' data(magic_testdata)
 #'

From bac8a70919a6a72f9823494b4f606607ac62605a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 18:04:48 -0400
Subject: [PATCH 18/32] require scprep

---
 python/requirements.txt | 1 +
 python/setup.py         | 1 +
 2 files changed, 2 insertions(+)

diff --git a/python/requirements.txt b/python/requirements.txt
index 27005d46..0621bdda 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -6,3 +6,4 @@ scikit-learn>=0.19.1
 graphtools>=0.1.8
 future
 tasklogger>=0.2.1
+scprep>=0.7.1
diff --git a/python/setup.py b/python/setup.py
index 8371b82d..2e4c99d0 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -10,6 +10,7 @@
     'scikit-learn>=0.19.1',
     'tasklogger>=0.2.1',
     'graphtools>=0.1.9',
+    'scprep>=0.7.1'
 ]
 
 test_requires = [

From 535dab8995fe88004a63de06e70342ae485d4453 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 18:24:32 -0400
Subject: [PATCH 19/32] put tests outside module

---
 python/{magic => test}/test.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename python/{magic => test}/test.py (100%)

diff --git a/python/magic/test.py b/python/test/test.py
similarity index 100%
rename from python/magic/test.py
rename to python/test/test.py

From 0304ccc7d31ee8595ae89a66d2285db71f93ba02 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 18:30:07 -0400
Subject: [PATCH 20/32] ignore pycache

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4a1d8971..2c49dd8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,9 @@ python/*.egg-info
 python/magic/__pycache__
 python/magic/*.pyc
 python/tutorial_notebooks/.ipynb_checkpoints
+__pycache__
+.eggs
+
 
 matlab/EMT.csv
 

From aaf4c58d2a5746e94867c847fd7c9095350e42a5 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 19:08:42 -0400
Subject: [PATCH 21/32] reduce test value

---
 python/test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/test/test.py b/python/test/test.py
index c6344ef0..6a575426 100644
--- a/python/test/test.py
+++ b/python/test/test.py
@@ -37,7 +37,7 @@ def test_scdata():
                                              genes="all_genes")
     assert scdata_norm.shape == magic_all_genes.shape
     dremi = magic_op.knnDREMI("VIM", "ZEB1", plot=True)
-    np.testing.assert_allclose(dremi, 1.5687165)
+    np.testing.assert_allclose(dremi, 1.568716)
 
 
 def test_anndata():

From 5e6e9d3f8c9daa22f39484ab576549911bb7d64f Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 19:09:44 -0400
Subject: [PATCH 22/32] remove redundant reference

---
 python/magic/magic.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/python/magic/magic.py b/python/magic/magic.py
index d79cc0c5..83cdbcfe 100644
--- a/python/magic/magic.py
+++ b/python/magic/magic.py
@@ -726,12 +726,6 @@ def knnDREMI(self, gene_x, gene_y,
         -------
         dremi : float
             kNN condtional Density resampled estimate of mutual information
-
-        References
-        ----------
-        .. [1] van Dijk D *et al.* (2018),
-            *Recovering Gene Interactions from Single-Cell Data Using Data
-            Diffusion*, `Cell <https://doi.org/10.1016/j.cell.2018.05.061>`_.
         """
         data = self.transform(genes=[gene_x, gene_y])
         dremi = scprep.stats.knnDREMI(

From 13189470fde9bc8f17d4c21668bf29878cc1365c Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 20:30:27 -0400
Subject: [PATCH 23/32] slightly different floating point values on py2/3

---
 python/test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/test/test.py b/python/test/test.py
index 6a575426..5602bb0d 100644
--- a/python/test/test.py
+++ b/python/test/test.py
@@ -37,7 +37,7 @@ def test_scdata():
                                              genes="all_genes")
     assert scdata_norm.shape == magic_all_genes.shape
     dremi = magic_op.knnDREMI("VIM", "ZEB1", plot=True)
-    np.testing.assert_allclose(dremi, 1.568716)
+    np.testing.assert_allclose(dremi, 1.5687165, atol=0.0000005)
 
 
 def test_anndata():

From 846fc835bb988319f88d990802f013c4dea67a59 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 20:59:01 -0400
Subject: [PATCH 24/32] bump version

---
 python/magic/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/magic/version.py b/python/magic/version.py
index a955fdae..67bc602a 100644
--- a/python/magic/version.py
+++ b/python/magic/version.py
@@ -1 +1 @@
-__version__ = "1.2.1"
+__version__ = "1.3.0"

From 73aea1a6aef5ffeae10c2a9b5b35d743e6758ac6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 21:02:19 -0400
Subject: [PATCH 25/32] bump version and document

---
 Rmagic/DESCRIPTION  | 10 ++++++----
 Rmagic/README.Rmd   |  2 +-
 Rmagic/README.md    |  2 +-
 Rmagic/man/magic.Rd |  2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Rmagic/DESCRIPTION b/Rmagic/DESCRIPTION
index 36d12081..5aacc0b5 100644
--- a/Rmagic/DESCRIPTION
+++ b/Rmagic/DESCRIPTION
@@ -1,9 +1,10 @@
 Package: Rmagic
 Type: Package
 Title: MAGIC - Markov Affinity-Based Graph Imputation of Cells
-Version: 1.1.0
+Version: 1.3.0
 Authors@R: c(person(given = "David", family = "van Dijk", email = "davidvandijk@gmail.com", role = c("aut")),
-		  person(given = 'Scott', family = 'Gigante', email = 'scott.gigante@yale.edu', role = 'cre', comment = c(ORCID = '0000-0002-4544-2764')))
+		         person(given = 'Scott', family = 'Gigante', email = 'scott.gigante@yale.edu', role = 'cre', 
+		                comment = c(ORCID = '0000-0002-4544-2764')))
 Maintainer: Scott Gigante <scott.gigante@yale.edu>
 Description: MAGIC (Markov affinity-based graph imputation of cells) is a method for addressing technical noise in single-cell data, including under-sampling of mRNA molecules, often termed "dropout" which can severely obscure important gene-gene relationships. MAGIC shares information across similar cells, via data diffusion, to denoise the cell count matrix and fill in missing transcripts. Read more: van Dijk et al. (2018) <DOI:10.1016/j.cell.2018.05.061>.
 Depends:
@@ -15,9 +16,10 @@ Imports:
     reticulate (>= 1.4),
     ggplot2
 Suggests:
-	readr,
+  	readr,
     viridis,
-	phateR
+  	phateR
 License: GPL-2 | file LICENSE
 LazyData: true
 RoxygenNote: 6.1.0
+Encoding: UTF-8
diff --git a/Rmagic/README.Rmd b/Rmagic/README.Rmd
index 1d9a2d30..c4fe868a 100644
--- a/Rmagic/README.Rmd
+++ b/Rmagic/README.Rmd
@@ -1,5 +1,5 @@
 ---
-title : Rmagic v1.1.0
+title : Rmagic v1.3.0
 output: github_document
 toc: true
 ---
diff --git a/Rmagic/README.md b/Rmagic/README.md
index d6bdf6d2..4b1ae017 100644
--- a/Rmagic/README.md
+++ b/Rmagic/README.md
@@ -1,4 +1,4 @@
-Rmagic v1.1.0
+Rmagic v1.3.0
 ================
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
diff --git a/Rmagic/man/magic.Rd b/Rmagic/man/magic.Rd
index 95dd856c..092079f6 100644
--- a/Rmagic/man/magic.Rd
+++ b/Rmagic/man/magic.Rd
@@ -61,7 +61,7 @@ applied to single-cell RNA sequencing data, as described in
 van Dijk et al, 2018.
 }
 \examples{
-if (reticulate::py_module_available("phate")) {
+if (reticulate::py_module_available("magic")) {
 
 data(magic_testdata)
 

From 58bb5e38c6bf4784b20a9340e0c98ec90d63ac82 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:02:02 -0400
Subject: [PATCH 26/32] fix load_pymagic with delay_load, only check for
 r-reticulate on fail

---
 Rmagic/R/utils.R | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Rmagic/R/utils.R b/Rmagic/R/utils.R
index ef458f19..8ae098a2 100644
--- a/Rmagic/R/utils.R
+++ b/Rmagic/R/utils.R
@@ -10,23 +10,24 @@ null_equal <- function(x, y) {
 }
 
 load_pymagic <- function(delay_load = FALSE) {
-    if (is.null(pymagic)) {
+  if (is.null(pymagic)) {
     result <- try(pymagic <<- reticulate::import("magic", delay_load = delay_load))
   } else {
     result <- try(reticulate::import("magic", delay_load = delay_load))
   }
-  if (methods::is(result, "try-error") &&
-      (length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
-        length(grep("ImportError: No module named magic", result)) > 0)) {
+  if (methods::is(result, "try-error")) {
+    if ((!delay_load) && length(grep("ModuleNotFoundError: No module named 'magic'", result)) > 0 ||
+        length(grep("ImportError: No module named magic", result)) > 0) {
       if (utils::menu(c("Yes", "No"), title="Install MAGIC Python package with reticulate?") == 1) {
         install.magic()
       }
-  } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) {
-    message("Consider removing the 'r-reticulate' environment by running:")
-    if (grep("virtualenvs", reticulate::py_config()$python)) {
-      message("reticulate::virtualenv_remove('r-reticulate')")
-    } else {
-      message("reticulate::conda_remove('r-reticulate')")
+    } else if (length(grep("r\\-reticulate", reticulate::py_config()$python)) > 0) {
+      message("Consider removing the 'r-reticulate' environment by running:")
+      if (grep("virtualenvs", reticulate::py_config()$python)) {
+        message("reticulate::virtualenv_remove('r-reticulate')")
+      } else {
+        message("reticulate::conda_remove('r-reticulate')")
+      }
     }
   }
 }

From bc1606e93b3cc349c2cc356d07c944f2bd6d37ec Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:07:42 -0400
Subject: [PATCH 27/32] clean up precomputed graph logic:

---
 python/magic/magic.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/python/magic/magic.py b/python/magic/magic.py
index 83cdbcfe..57e9d2f2 100644
--- a/python/magic/magic.py
+++ b/python/magic/magic.py
@@ -329,20 +329,22 @@ def fit(self, X, graph=None):
                 tasklogger.log_debug(
                     "Reset graph due to difference in input data")
                 graph = None
-
-        if graph is not None:
-            try:
-                graph.set_params(
-                    decay=self.a, knn=self.k + 1, distance=self.knn_dist,
-                    n_jobs=self.n_jobs, verbose=self.verbose, n_pca=n_pca,
-                    thresh=1e-4, random_state=self.random_state)
-                tasklogger.log_info(
-                    "Using precomputed graph and diffusion operator...")
-            except ValueError as e:
-                # something changed that should have invalidated the graph
-                tasklogger.log_debug(
-                    "Reset graph due to {}".format(str(e)))
-                graph = None
+            elif graph is not None:
+                try:
+                    graph.set_params(
+                        decay=self.a, knn=self.k + 1, distance=self.knn_dist,
+                        n_jobs=self.n_jobs, verbose=self.verbose, n_pca=n_pca,
+                        thresh=1e-4, random_state=self.random_state)
+                except ValueError as e:
+                    # something changed that should have invalidated the graph
+                    tasklogger.log_debug(
+                        "Reset graph due to {}".format(str(e)))
+                    graph = None
+        else:
+            self.k = graph.knn - 1
+            self.alpha = graph.decay
+            self.n_pca = graph.n_pca
+            self.knn_dist = graph.distance
 
         self.X = X
 
@@ -350,7 +352,11 @@ def fit(self, X, graph=None):
             warnings.warn("Input matrix contains unexpressed genes. "
                           "Please remove them prior to running MAGIC.")
 
-        if graph is None:
+        if graph is not None:
+            tasklogger.log_info(
+                "Using precomputed graph and diffusion operator...")
+            self.graph = graph
+        else:
             # reset X_magic in case it was previously set
             self.X_magic = None
             tasklogger.log_start("graph and diffusion operator")
@@ -364,8 +370,6 @@ def fit(self, X, graph=None):
                 verbose=self.verbose,
                 random_state=self.random_state)
             tasklogger.log_complete("graph and diffusion operator")
-        else:
-            self.graph = graph
 
         return self
 

From d79c449dab7d4b21d7b6b80c70801fab386d8ee6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:14:03 -0400
Subject: [PATCH 28/32] Properly deprecate magic.io

---
 python/magic/io.py | 386 +++------------------------------------------
 1 file changed, 22 insertions(+), 364 deletions(-)

diff --git a/python/magic/io.py b/python/magic/io.py
index b0525ec6..6346a2a5 100644
--- a/python/magic/io.py
+++ b/python/magic/io.py
@@ -11,167 +11,7 @@
 import zipfile
 import tempfile
 import shutil
-try:
-    import fcsparser
-except ImportError:
-    pass
-try:
-    import tables
-except ImportError:
-    pass
-
-try:
-    FileNotFoundError
-except NameError:
-    # py2 compatibility
-    FileNotFoundError = OSError
-
-
-def with_fcsparser(fun):
-    def wrapped_fun(*args, **kwargs):
-        try:
-            fcsparser
-        except NameError:
-            raise ImportError(
-                "fcsparser not found. "
-                "Please install it with e.g. `pip install --user fcsparser`")
-        return fun(*args, **kwargs)
-    return wrapped_fun
-
-
-def with_tables(fun):
-    def wrapped_fun(*args, **kwargs):
-        try:
-            tables
-        except NameError:
-            raise ImportError(
-                "tables not found. "
-                "Please install it with e.g. `pip install --user tables`")
-        return fun(*args, **kwargs)
-    return wrapped_fun
-
-
-def _parse_header(header, n_expected, header_type="gene_names"):
-    """
-    Parameters
-    ----------
-    header : `str` filename, array-like or `None`
-
-    n_expected : `int`
-        Expected header length
-
-    header_type : argument name for error printing
-    """
-    if header is None or header is False:
-        return None
-    elif isinstance(header, str):
-        # treat as a file
-        if header.endswith("tsv"):
-            delimiter = "\t"
-        else:
-            delimiter = ","
-        columns = pd.read_csv(header, delimiter=delimiter,
-                              header=None).values.reshape(-1)
-        if not len(columns) == n_expected:
-            raise ValueError("Expected {} entries in {}. Got {}".format(
-                n_expected, header, len(columns)))
-    else:
-        # treat as list
-        columns = header
-        if not len(columns) == n_expected:
-            raise ValueError("Expected {} entries in {}. Got {}".format(
-                n_expected, header_type, len(columns)))
-    return columns
-
-
-def _parse_gene_names(header, data):
-    return _parse_header(header, data.shape[1],
-                         header_type="gene_names")
-
-
-def _parse_cell_names(header, data):
-    return _parse_header(header, data.shape[0],
-                         header_type="cell_names")
-
-
-def _matrix_to_data_frame(data, gene_names=None, cell_names=None, sparse=None):
-    """Return the optimal data type given data, gene names and cell names.
-
-    Parameters
-    ----------
-
-    data : array-like
-
-    gene_names : `str`, array-like or `None` (default: None)
-        Either a filename or an array containing a list of gene symbols or ids.
-
-    cell_names : `str`, array-like or `None` (default: None)
-        Either a filename or an array containing a list of cell barcodes.
-
-    sparse : `bool` or `None` (default: None)
-        If not `None`, overrides default sparsity of the data.
-    """
-    if gene_names is None and cell_names is None and \
-            not isinstance(data, pd.DataFrame):
-        # just a matrix
-        if sparse is not None:
-            if sparse:
-                if not sp.issparse(data):
-                    # return scipy.sparse.csr_matrix
-                    data = sp.csr_matrix(data)
-            elif sp.issparse(data) and not sparse:
-                # return numpy.ndarray
-                data = data.toarray()
-        else:
-            # return data as is
-            pass
-        return data
-    else:
-        gene_names = _parse_gene_names(gene_names, data)
-        cell_names = _parse_cell_names(cell_names, data)
-        # dataframe with index and/or columns
-        if sparse is None:
-            # let the input data decide
-            sparse = isinstance(data, pd.SparseDataFrame) or sp.issparse(data)
-        if sparse and gene_names is not None and \
-                len(np.unique(gene_names)) < len(gene_names):
-            warnings.warn(
-                "Duplicate gene names detected! Forcing dense matrix",
-                RuntimeWarning)
-            sparse = False
-        if sparse:
-            # return pandas.SparseDataFrame
-            if isinstance(data, pd.DataFrame):
-                if gene_names is not None:
-                    data.columns = gene_names
-                if cell_names is not None:
-                    data.index = cell_names
-                if not isinstance(data, pd.SparseDataFrame):
-                    data = data.to_sparse(fill_value=0.0)
-            else:
-                data = pd.SparseDataFrame(data, default_fill_value=0.0,
-                                          index=cell_names, columns=gene_names)
-        else:
-            # return pandas.DataFrame
-            if isinstance(data, pd.DataFrame):
-                if gene_names is not None:
-                    data.columns = gene_names
-                if cell_names is not None:
-                    data.index = cell_names
-                if isinstance(data, pd.SparseDataFrame):
-                    data = data.to_dense()
-            else:
-                if sp.issparse(data):
-                    data = data.toarray()
-                data = pd.DataFrame(data, index=cell_names, columns=gene_names)
-        return data
-
-
-def _read_csv_sparse(filename, chunksize=1000000, fill_value=0.0, **kwargs):
-    chunks = pd.read_csv(filename, chunksize=chunksize, **kwargs)
-    data = pd.concat(chunk.to_sparse(fill_value=fill_value)
-                     for chunk in chunks)
-    return data
+import scprep
 
 
 def load_csv(filename, cell_axis='row', delimiter=',',
@@ -206,49 +46,9 @@ def load_csv(filename, cell_axis='row', delimiter=',',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    if cell_axis not in ['row', 'column', 'col']:
-        raise ValueError(
-            "cell_axis {} not recognized. Expected 'row' or 'column'".format(
-                cell_axis))
-
-    if 'index_col' in kwargs:
-        # override
-        index_col = kwargs['index_col']
-        cell_names = None
-        del kwargs['index_col']
-    elif cell_names is True:
-        index_col = 0
-        cell_names = None
-    else:
-        index_col = None
-
-    if 'header' in kwargs:
-        # override
-        header = kwargs['header']
-        del kwargs['header']
-        gene_names = None
-    elif gene_names is True:
-        header = 0
-        gene_names = None
-    else:
-        header = None
-
-    # Read in csv file
-    if sparse:
-        read_fun = _read_csv_sparse
-    else:
-        read_fun = pd.read_csv
-    data = read_fun(filename, delimiter=delimiter,
-                    header=header, index_col=index_col,
-                    **kwargs)
-
-    if cell_axis in ['column', 'col']:
-        data = data.T
-
-    data = _matrix_to_data_frame(
-        data, gene_names=gene_names,
-        cell_names=cell_names, sparse=sparse)
-    return data
+    return scprep.io.load_csv(filename=filename, cell_axis=cell_axis, delimiter=delimiter,
+                              gene_names=gene_names, cell_names=cell_names,
+                              sparse=sparse, **kwargs)
 
 
 def load_tsv(filename, cell_axis='row', delimiter='\t',
@@ -285,7 +85,6 @@ def load_tsv(filename, cell_axis='row', delimiter='\t',
                     sparse=sparse, **kwargs)
 
 
-@with_fcsparser
 def load_fcs(filename, gene_names=True, cell_names=True,
              sparse=None,
              metadata_channels=['Time', 'Event_length', 'DNA1', 'DNA2',
@@ -315,19 +114,10 @@ def load_fcs(filename, gene_names=True, cell_names=True,
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    if cell_names is True:
-        cell_names = None
-    if gene_names is True:
-        gene_names = None
-    # Parse the fcs file
-    meta, data = fcsparser.parse(filename)
-    metadata_channels = data.columns.intersection(metadata_channels)
-    data_channels = data.columns.difference(metadata_channels)
-    metadata = data[metadata_channels]
-    data = data[data_channels]
-    data = _matrix_to_data_frame(data, gene_names=gene_names,
-                                 cell_names=cell_names, sparse=sparse)
-    return metadata, data
+    return scprep.io.load_fcs(filename=filename, gene_names=gene_names,
+                              cell_names=cell_names,
+                              sparse=sparse,
+                              metadata_channels=metadata_channels)
 
 
 def load_mtx(mtx_file, cell_axis='row',
@@ -356,60 +146,9 @@ def load_mtx(mtx_file, cell_axis='row',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    if cell_axis not in ['row', 'column', 'col']:
-        raise ValueError(
-            "cell_axis {} not recognized. Expected 'row' or 'column'".format(
-                cell_axis))
-    # Read in mtx file
-    data = sio.mmread(mtx_file)
-    if cell_axis in ['column', 'col']:
-        data = data.T
-    data = _matrix_to_data_frame(
-        data, gene_names=gene_names,
-        cell_names=cell_names, sparse=sparse)
-    return data
-
-
-def _combine_gene_id(symbols, ids):
-    """Creates gene labels of the form SYMBOL (ID)
-
-    Parameters
-    ----------
-
-    genes: pandas.DataFrame with columns['symbol', 'id']
-
-    Returns
-    -------
-
-    pandas.Index with combined gene symbols and ids
-    """
-    columns = np.core.defchararray.add(
-        np.array(symbols, dtype=str), ' (')
-    columns = np.core.defchararray.add(
-        columns, np.array(ids, dtype=str))
-    columns = np.core.defchararray.add(columns, ')')
-    return columns
-
-
-def _parse_10x_genes(symbols, ids, gene_labels='symbol',
-                     allow_duplicates=True):
-    if gene_labels not in ['symbol', 'id', 'both']:
-        raise ValueError("gene_labels='{}' not recognized. Choose from "
-                         "['symbol', 'id', 'both']")
-    if gene_labels == 'both':
-        columns = _combine_gene_id(symbols, ids)
-    if gene_labels == 'symbol':
-        columns = symbols
-        if not allow_duplicates and len(np.unique(columns)) < len(columns):
-            warnings.warn(
-                "Duplicate gene names detected! Forcing `gene_labels='id'`. "
-                "Alternatively, try `gene_labels='both'`, "
-                "`allow_duplicates=True`, or load the matrix"
-                " with `sparse=False`", RuntimeWarning)
-            gene_labels = 'id'
-    if gene_labels == 'id':
-        columns = ids
-    return columns
+    return scprep.io.load_mtx(mtx_file=mtx_file, cell_axis=cell_axis,
+                              gene_names=gene_names, cell_names=cell_names,
+                              sparse=sparse)
 
 
 def load_10X(data_dir, sparse=True, gene_labels='symbol',
@@ -447,38 +186,8 @@ def load_10X(data_dir, sparse=True, gene_labels='symbol',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-
-    if gene_labels not in ['id', 'symbol', 'both']:
-        raise ValueError("gene_labels not in ['id', 'symbol', 'both']")
-
-    if not os.path.isdir(data_dir):
-        raise FileNotFoundError(
-            "{} is not a directory".format(data_dir))
-
-    try:
-        m = sio.mmread(os.path.join(data_dir, "matrix.mtx"))
-        genes = pd.read_csv(os.path.join(data_dir, "genes.tsv"),
-                            delimiter='\t', header=None)
-        genes.columns = ['id', 'symbol']
-        barcodes = pd.read_csv(os.path.join(data_dir, "barcodes.tsv"),
-                               delimiter='\t', header=None)
-
-    except (FileNotFoundError, OSError):
-        raise FileNotFoundError(
-            "'matrix.mtx', 'genes.tsv', and 'barcodes.tsv' must be present "
-            "in {}".format(data_dir))
-
-    cell_names = barcodes[0]
-    if allow_duplicates is None:
-        allow_duplicates = not sparse
-    gene_names = _parse_10x_genes(genes['symbol'], genes['id'],
-                                  gene_labels=gene_labels,
-                                  allow_duplicates=allow_duplicates)
-
-    data = _matrix_to_data_frame(m.T, cell_names=cell_names,
-                                 gene_names=gene_names,
-                                 sparse=sparse)
-    return data
+    return scprep.io.load_10X(data_dir=data_dir, sparse=sparse, gene_labels=gene_labels,
+                              allow_duplicates=allow_duplicates)
 
 
 def load_10X_zip(filename, sparse=True, gene_labels='symbol',
@@ -507,33 +216,11 @@ def load_10X_zip(filename, sparse=True, gene_labels='symbol',
     data: pandas.DataFrame shape = (n_cell, n_genes)
         imported data matrix
     """
-    tmpdir = tempfile.mkdtemp()
-    with zipfile.ZipFile(filename) as handle:
-        files = handle.namelist()
-        if len(files) != 4:
-            valid = False
-        else:
-            dirname = files[0].strip("/")
-            subdir_files = [f.split("/")[-1] for f in files]
-            if "barcodes.tsv" not in subdir_files:
-                valid = False
-            elif "genes.tsv" not in subdir_files:
-                valid = False
-            elif "matrix.mtx" not in subdir_files:
-                valid = False
-            else:
-                valid = True
-        if not valid:
-            raise ValueError(
-                "Expected a single zipped folder containing 'matrix.mtx', "
-                "'genes.tsv', and 'barcodes.tsv'. Got {}".format(files))
-        handle.extractall(path=tmpdir)
-    data = load_10X(os.path.join(tmpdir, dirname))
-    shutil.rmtree(tmpdir)
-    return data
-
-
-@with_tables
+    return scprep.io.load_10X_zip(filename=filename, sparse=sparse,
+                                  gene_labels=gene_labels,
+                                  allow_duplicates=allow_duplicates)
+
+
 def load_10X_HDF5(filename, genome=None, sparse=True, gene_labels='symbol',
                   allow_duplicates=None):
     """Basic IO for HDF5 10X data produced from the 10X Cellranger pipeline.
@@ -566,36 +253,7 @@ def load_10X_HDF5(filename, genome=None, sparse=True, gene_labels='symbol',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    with tables.open_file(filename, 'r') as f:
-        if genome is None:
-            genomes = [node._v_name for node in f.list_nodes(f.root)]
-            print_genomes = ", ".join(genomes)
-            genome = genomes[0]
-            if len(genomes) > 1:
-                print("Available genomes: {}. Selecting {} by default".format(
-                    print_genomes, genome))
-        try:
-            group = f.get_node(f.root, genome)
-        except tables.NoSuchNodeError:
-            genomes = [node._v_name for node in f.list_nodes(f.root)]
-            print_genomes = ", ".join(genomes)
-            raise ValueError(
-                "Genome {} not found in {}. "
-                "Available genomes: {}".format(genome, filename, print_genomes))
-        if allow_duplicates is None:
-            allow_duplicates = not sparse
-        gene_names = _parse_10x_genes(
-            symbols=[g.decode() for g in getattr(group, 'gene_names').read()],
-            ids=[g.decode() for g in getattr(group, 'genes').read()],
-            gene_labels=gene_labels, allow_duplicates=allow_duplicates)
-        cell_names = [b.decode() for b in getattr(group, 'barcodes').read()]
-        data = getattr(group, 'data').read()
-        indices = getattr(group, 'indices').read()
-        indptr = getattr(group, 'indptr').read()
-        shape = getattr(group, 'shape').read()
-        data = sp.csc_matrix((data, indices, indptr), shape=shape)
-        data = _matrix_to_data_frame(data.T,
-                                     gene_names=gene_names,
-                                     cell_names=cell_names,
-                                     sparse=sparse)
-        return data
+    return scprep.io.load_10X_HDF5(filename=filename, genome=genome,
+                                   sparse=sparse,
+                                   gene_labels=gene_labels,
+                                   allow_duplicates=allow_duplicates)

From 69722d07e5bb19809fcf1c7de7769415e32f640e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:15:14 -0400
Subject: [PATCH 29/32] Clean up imports

---
 python/magic/io.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/magic/io.py b/python/magic/io.py
index 6346a2a5..23f16e82 100644
--- a/python/magic/io.py
+++ b/python/magic/io.py
@@ -2,15 +2,7 @@
 # (C) 2018 Krishnaswamy Lab GPLv2
 
 from __future__ import print_function, division
-import pandas as pd
-import scipy.io as sio
-import scipy.sparse as sp
 import warnings
-import numpy as np
-import os
-import zipfile
-import tempfile
-import shutil
 import scprep
 
 
@@ -46,7 +38,8 @@ def load_csv(filename, cell_axis='row', delimiter=',',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    return scprep.io.load_csv(filename=filename, cell_axis=cell_axis, delimiter=delimiter,
+    return scprep.io.load_csv(filename=filename, cell_axis=cell_axis,
+                              delimiter=delimiter,
                               gene_names=gene_names, cell_names=cell_names,
                               sparse=sparse, **kwargs)
 
@@ -186,7 +179,8 @@ def load_10X(data_dir, sparse=True, gene_labels='symbol',
     warnings.warn("magic.io is deprecated. Please use scprep.io instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    return scprep.io.load_10X(data_dir=data_dir, sparse=sparse, gene_labels=gene_labels,
+    return scprep.io.load_10X(data_dir=data_dir, sparse=sparse,
+                              gene_labels=gene_labels,
                               allow_duplicates=allow_duplicates)
 
 

From 9a725359b2f7288b7db73ed4408350680076c704 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:15:53 -0400
Subject: [PATCH 30/32] Properly deprecate magic.proprocessing

---
 python/magic/preprocessing.py | 59 ++---------------------------------
 1 file changed, 2 insertions(+), 57 deletions(-)

diff --git a/python/magic/preprocessing.py b/python/magic/preprocessing.py
index a4b0bc91..32a470e4 100644
--- a/python/magic/preprocessing.py
+++ b/python/magic/preprocessing.py
@@ -2,11 +2,8 @@
 # (C) 2017 Krishnaswamy Lab GPLv2
 
 from __future__ import print_function, division
-from sklearn.preprocessing import normalize
-import numpy as np
-from scipy import sparse
-import pandas as pd
 import warnings
+import scprep
 
 
 def library_size_normalize(data, verbose=False):
@@ -30,56 +27,4 @@ def library_size_normalize(data, verbose=False):
                   "Please use scprep.normalize instead. "
                   "Read more at http://scprep.readthedocs.io",
                   FutureWarning)
-    if verbose:
-        print("Normalizing library sizes for %s cells" % (data.shape[0]))
-
-    # pandas support
-    columns, index = None, None
-    if isinstance(data, pd.SparseDataFrame) or \
-            pd.api.types.is_sparse(data):
-        columns, index = data.columns, data.index
-        data = data.to_coo()
-    elif isinstance(data, pd.DataFrame):
-        columns, index = data.columns, data.index
-
-    median_transcript_count = np.median(np.array(data.sum(axis=1)))
-    if sparse.issparse(data) and data.nnz >= 2**31:
-        # check we can access elements by index
-        try:
-            data[0, 0]
-        except TypeError:
-            data = sparse.csr_matrix(data)
-        # normalize in chunks - sklearn doesn't does with more
-        # than 2**31 non-zero elements
-        #
-        # determine maximum chunk size
-        split = 2**30 // (data.nnz // data.shape[0])
-        size_ok = False
-        while not size_ok:
-            for i in range(0, data.shape[0], split):
-                if data[i:i + split, :].nnz >= 2**31:
-                    split = split // 2
-                    break
-            size_ok = True
-        # normalize
-        data_norm = []
-        for i in range(0, data.shape[0], split):
-            data_norm.append(normalize(data[i:i + split, :], 'l1', axis=1))
-        # combine chunks
-        data_norm = sparse.vstack(data_norm)
-    else:
-        data_norm = normalize(data, norm='l1', axis=1)
-
-    # norm = 'l1' computes the L1 norm which computes the
-    # axis = 1 independently normalizes each sample
-
-    data_norm = data_norm * median_transcript_count
-    if columns is not None:
-        # pandas dataframe
-        if sparse.issparse(data_norm):
-            data_norm = pd.SparseDataFrame(data_norm, default_fill_value=0)
-        else:
-            data_norm = pd.DataFrame(data_norm)
-        data_norm.columns = columns
-        data_norm.index = index
-    return data_norm
+    return scprep.normalize.library_size_normalize(data)

From 19d03da0034ec6ac9fe94a08de08b18c939c5114 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:16:35 -0400
Subject: [PATCH 31/32] remove unnecessary test_requires scprep

---
 python/setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/setup.py b/python/setup.py
index 2e4c99d0..5cffd732 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -15,7 +15,6 @@
 
 test_requires = [
     'nose2',
-    'scprep',
 ]
 
 if sys.version_info[0] == 3:

From dbea6889c07ccf0c26090ec59f1790e1830b6163 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 21 Sep 2018 14:37:59 -0400
Subject: [PATCH 32/32] don't document io and preprocessing

---
 python/doc/source/api.rst | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index a5e37ecd..66b6e079 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -9,22 +9,6 @@ MAGIC
     :inherited-members:
     :show-inheritance:
 
-File Input/Output
------------------
-
-.. automodule:: magic.io
-    :members:
-    :inherited-members:
-    :show-inheritance:
-
-Data Preprocessing
-------------------
-
-.. automodule:: magic.preprocessing
-    :members:
-    :inherited-members:
-    :show-inheritance:
-
 Plotting
 --------