diff --git a/.DS_Store b/.DS_Store index 54544be..07d7109 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index 458a3a5..2617bf9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ParetoTI Type: Package Title: R toolbox for Archetypal Analysis and Pareto Task Inference on single cell data -Version: 0.1.10 +Version: 0.1.11 Author: Vitalii Kleshchevnikov Maintainer: Vitalii Kleshchevnikov Authors@R: c(person("Vitalii", "Kleshchevnikov",, "vk7@sanger.ac.uk", role = c("aut", "cre"))) diff --git a/NAMESPACE b/NAMESPACE index e083475..253fc8c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(map_go_annot) export(map_gwas_annot) export(measure_activity) export(merge_arch_dist) +export(pch_fit) export(plot.gam_deriv) export(plot.r_pch_fit) export(plot_arc) diff --git a/R/fit_pch.R b/R/fit_pch.R index 0bf0e5e..375c07e 100644 --- a/R/fit_pch.R +++ b/R/fit_pch.R @@ -80,15 +80,22 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, delta = 0, verbose = FALSE, conv_crit = 1e-4, maxiter = 500, check_installed = T, order_by = seq(1, nrow(data)), - order_type = c("cosine", "side", "align")[3], - volume_ratio = c("t_ratio", "variance_ratio", "none")[1], + order_type = c("align", "cosine", "side"), + volume_ratio = c("t_ratio", "variance_ratio", "none"), converge_else_fail = TRUE, var_in_dims = FALSE, normalise_var = TRUE, method = c("pcha", "kmeans"), method_options = list()) { - if(check_installed) .py_pcha_installed() + # check arguments + method = match.arg(method) + volume_ratio = match.arg(volume_ratio) + order_type = match.arg(order_type) + + + if(method == "pcha"){ + + if(check_installed) .py_pcha_installed() - if(isTRUE(method[1] == "pcha")){ # run PCHA ------------------------------------------------------------------- # coerce to matrix @@ -112,8 +119,10 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, }) #--------------------------------------------------------------------------- - } else if(isTRUE(method[1] == "kmeans")) { + } else if(method == "kmeans") { + # run k-means -------------------------------------------------------------- + # set defaults or replace them with provided options default = list(iter.max = 10, nstart = 1, algorithm = c("Hartigan-Wong", "Lloyd", "Forgy", @@ -122,19 +131,26 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, options = c(default[default_retain], method_options) res = kmeans(Matrix::t(data), centers = as.integer(noc), - iter.max = default$iter.max, nstart = default$nstart, - algorithm = default$algorithm, trace = default$trace) - res = list(XC = t(res$centers), - S = Matrix::sparseMatrix(i = res$cluster, - j = seq_len(length(res$cluster)), x = 1), - C = Matrix::sparseMatrix(i = res$cluster, - j = seq_len(length(res$cluster)), x = 0), + iter.max = options$iter.max, nstart = options$nstart, + algorithm = options$algorithm, trace = options$trace) + + # Create S as binary cluster membership matrix + S = Matrix::sparseMatrix(i = res$cluster, + j = seq_len(length(res$cluster)), x = 1) + # compute C so that X %*% C gives cluster averages + C = S / matrix(Matrix::rowSums(S), nrow = nrow(S), ncol = ncol(S), byrow = FALSE) + C = Matrix::t(C) + + # create pch_fit object to be returned + res = list(XC = t(res$centers), S = S, C = C, SSE = res$tot.withinss, varexpl = res$betweenss / res$totss) if(!is.null(rownames(data))) rownames(res$XC) = rownames(data) colnames(res$XC) = NULL class(res) = "pch_fit" + #--------------------------------------------------------------------------- - } else stop("method should be pcha or kmeans") + + } else stop("method should be pcha or kmeans") if(is.null(res)) return(NULL) @@ -145,7 +161,7 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, arch_order = .find_archetype_order(XC2, noc, order_type = order_type) res$XC = res$XC[, arch_order] res$S = res$S[arch_order, ] - res$C = res$C[arch_order, ] + res$C = res$C[, arch_order] } else { # when only one archetype make sure data is still in the matrix form res$XC = matrix(res$XC, length(res$XC), 1) @@ -158,7 +174,7 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, # when calculating convex hull and volume of the polytope # adjust number of dimensions to noc data_dim = seq(1, noc-1) - if(volume_ratio == "t_ratio" & nrow(data) >= length(data_dim) & noc > 2){ + if(volume_ratio == "t_ratio" & nrow(data) >= length(data_dim) & noc > 2 & method != "poisson_aa"){ # calculate volume or area of the polytope only when number of archetypes (noc) > number of dimenstions which means # find volume of the convex hull of the data hull_vol = fit_convhulln(data[data_dim, ], positions = FALSE) @@ -201,7 +217,7 @@ fit_pch = function(data, noc = as.integer(3), I = NULL, U = NULL, } else { - # 3 none + # 3 NA if(volume_ratio == "t_ratio" & isTRUE(converge_else_fail)) message(paste0("Convex hull and t-ratio not computed for noc: ", noc," and nrow(data) = ", nrow(data),". fit_pch() can calculate volume or area of the polytope only when\nthe number of archetypes (noc) > the number of dimensions (when polytope is convex):\ncheck that noc > nrow(data),\nselect only revelant dimensions or increase noc")) res$hull_vol = NA @@ -1095,3 +1111,20 @@ plot_dim_var = function(rand_arch, } res } + +##' @rdname fit_pch +##' @name pch_fit +##' @description \code{pch_fit()} a constructor function for the "pch_fit" class +##' @export pch_fit +pch_fit = function(XC, S, C, SSE, varexpl, arc_vol, hull_vol, t_ratio, var_vert, + var_dim, total_var, summary, call) { + + # add integrity checks + + # create object + structure(list(XC = XC, S = S, C = C, SSE = SSE, varexpl = varexpl, + arc_vol = arc_vol, hull_vol = hull_vol, t_ratio = t_ratio, + var_vert = var_vert, var_dim = var_dim, total_var = total_var, + summary = summary, call = call), + class = "pch_fit") +} diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index a75990e..5455860 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -60,7 +60,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/articles/Comparison_to_kmeans.html b/docs/articles/Comparison_to_kmeans.html index 95c84fe..01c6972 100644 --- a/docs/articles/Comparison_to_kmeans.html +++ b/docs/articles/Comparison_to_kmeans.html @@ -30,7 +30,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/articles/Comparison_to_kmeans_files/figure-html/arc_vs_kmeans-1.png b/docs/articles/Comparison_to_kmeans_files/figure-html/arc_vs_kmeans-1.png index f41c2de..4cbd0df 100644 Binary files a/docs/articles/Comparison_to_kmeans_files/figure-html/arc_vs_kmeans-1.png and b/docs/articles/Comparison_to_kmeans_files/figure-html/arc_vs_kmeans-1.png differ diff --git a/docs/articles/Comparison_to_kmeans_files/figure-html/sim-1.png b/docs/articles/Comparison_to_kmeans_files/figure-html/sim-1.png index 4bc8101..8b47ff0 100644 Binary files a/docs/articles/Comparison_to_kmeans_files/figure-html/sim-1.png and b/docs/articles/Comparison_to_kmeans_files/figure-html/sim-1.png differ diff --git a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-1-1.png index c461975..2673ab1 100644 Binary files a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-1-1.png and b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-1.png b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-1.png index 8a726b5..ef0c3ad 100644 Binary files a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-1.png and b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-1.png differ diff --git a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-2.png b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-2.png index 248ba70..0e1ff21 100644 Binary files a/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-2.png and b/docs/articles/Comparison_to_kmeans_files/figure-html/unnamed-chunk-2-2.png differ diff --git a/docs/articles/Hepatocyte_example.html b/docs/articles/Hepatocyte_example.html index b047e93..c45a49f 100644 --- a/docs/articles/Hepatocyte_example.html +++ b/docs/articles/Hepatocyte_example.html @@ -30,7 +30,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -204,26 +204,26 @@

data_lab = as.numeric(logcounts(hepatocytes["Alb",])), text_size = 60, data_size = 6) plotly::layout(p_pca, title = "Hepatocytes colored by Alb (Albumine)") -
-
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
+
+
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
                  which_dimensions = 1:3, line_size = 1.5,
                  data_lab = as.numeric(logcounts(hepatocytes["Cyp2e1",])),
                  text_size = 60, data_size = 6) 
 plotly::layout(p_pca, title = "Hepatocytes colored by Cyp2e1")
-
-
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
+
+
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
                  which_dimensions = 1:3, line_size = 1.5,
                  data_lab = as.numeric(logcounts(hepatocytes["Gpx1",])),
                  text_size = 60, data_size = 6) 
 plotly::layout(p_pca, title = "Hepatocytes colored by Gpx1")
-
-
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
+
+
p_pca = plot_arc(arch_data = arc, data = PCs4arch, 
                  which_dimensions = 1:3, line_size = 1.5,
                  data_lab = as.numeric(logcounts(hepatocytes["Apoa2",])),
                  text_size = 60, data_size = 6) 
 plotly::layout(p_pca, title = "Hepatocytes colored by Apoa2")
-
-
# You can also check which cells have high entropy of logistic regression predictions when classifying all cells in a tissue into cell types. These could have been misclassified by the method and wrongly assigned to Hepatocytes, or these could be doublets.
+
+
# You can also check which cells have high entropy of logistic regression predictions when classifying all cells in a tissue into cell types. These could have been misclassified by the method and wrongly assigned to Hepatocytes, or these could be doublets.
 
 # find archetypes on all data (allows using archetype weights to describe cells)
 arc_1 = fit_pch(PCs4arch, volume_ratio = "t_ratio", maxiter = 500,
@@ -235,8 +235,8 @@ 

data_lab = as.numeric(logcounts(hepatocytes["Alb",])), text_size = 60, data_size = 6) plotly::layout(p_pca, title = "Hepatocytes colored by Alb")

-
-
+
+

Find genes and gene sets enriched near vertices

# Map GO annotations and measure activities
@@ -285,7 +285,7 @@ 

## ## Cyp2f2, Hsd17b13, Hsd17b6, Lpin1 ## Ly6e, Gc, Fbp1, Dak -## Sds, Rpl10, Hpx, Atp5g1 +## Rpl10, Sds, Hpx, Atp5g1 ## ## tricarboxylic_acid_cycle ## citrate_metabolic_process @@ -330,8 +330,8 @@

## Adding markers to the mode... ## A marker object has been specified, but markers is not in the mode ## Adding markers to the mode...

-
- +
+

@@ -352,23 +352,23 @@

# This analysis took: Sys.time() - start

-
## Time difference of 1.601215 mins
+
## Time difference of 1.727351 mins
# plot background distribution of t-ratio and show p-value
 plot(pch_rand, type = c("t_ratio"), nudge_y = 5)
-
## Picking joint bandwidth of 0.00343
+
## Picking joint bandwidth of 0.00363

pch_rand
## Background distribution of k representative archetypes 
 ## in data with no relationships between variables (S3 class r_pch_fit)
 ## 
-## N randomisation trials:  1000 
+## N randomisation trials:  998 
 ## 
 ## Summary of best-fit polytope to observed data (including p-value):
 ## 
-##    k  var_name   var_obs p_value
-## 1: 4   varexpl 0.6166616   0.001
-## 2: 4   t_ratio 0.1565214   0.094
-## 3: 4 total_var        NA     NaN
+##    k  var_name   var_obs     p_value
+## 1: 4   varexpl 0.6166804 0.001002004
+## 2: 4   t_ratio 0.1564470 0.100200401
+## 3: 4 total_var        NA         NaN
 ## 
 ##           varexpl = variance explained by data as weighted sum of archetypes
 ##           t_ratio = volume of polytope formed by archetypes / volume of convex hull
@@ -386,7 +386,7 @@ 

Date and packages used

Sys.Date. = Sys.Date()
 Sys.Date.
-
## [1] "2019-06-13"
+
## [1] "2019-07-05"
session_info. = devtools::session_info()
 session_info.
## ─ Session info ──────────────────────────────────────────────────────────
@@ -399,7 +399,7 @@ 

## collate en_GB.UTF-8 ## ctype en_GB.UTF-8 ## tz Europe/London -## date 2019-06-13 +## date 2019-07-05 ## ## ─ Packages ────────────────────────────────────────────────────────────── ## package * version date lib source @@ -444,15 +444,15 @@

## dplyr 0.8.1 2019-05-14 [1] CRAN (R 3.5.2) ## dynamicTreeCut 1.63-1 2016-03-11 [1] CRAN (R 3.5.0) ## edgeR 3.24.3 2019-01-02 [1] Bioconductor -## evaluate 0.13 2019-02-12 [1] CRAN (R 3.5.2) +## evaluate 0.14 2019-05-28 [1] CRAN (R 3.5.2) ## foreach * 1.4.4 2017-12-12 [1] CRAN (R 3.5.0) -## fs 1.3.0 2019-05-02 [1] CRAN (R 3.5.2) +## fs 1.3.1 2019-05-06 [1] CRAN (R 3.5.2) ## GenomeInfoDb * 1.18.2 2019-02-12 [1] Bioconductor ## GenomeInfoDbData 1.2.0 2018-10-16 [1] Bioconductor ## GenomicRanges * 1.34.0 2018-10-30 [1] Bioconductor ## geometry 0.4.1 2019-03-27 [1] CRAN (R 3.5.2) ## ggbeeswarm 0.6.0 2017-08-07 [1] CRAN (R 3.5.0) -## ggplot2 * 3.1.1 2019-04-07 [1] CRAN (R 3.5.1) +## ggplot2 * 3.2.0 2019-06-16 [1] CRAN (R 3.5.2) ## ggridges 0.5.1 2018-09-27 [1] CRAN (R 3.5.0) ## glue 1.3.1 2019-03-12 [1] CRAN (R 3.5.2) ## GO.db 3.7.0 2018-10-25 [1] Bioconductor @@ -470,7 +470,7 @@

## IRanges * 2.16.0 2018-10-30 [1] Bioconductor ## iterators 1.0.10 2018-07-13 [1] CRAN (R 3.5.0) ## jsonlite 1.6 2018-12-07 [1] CRAN (R 3.5.0) -## knitr 1.22 2019-03-08 [1] CRAN (R 3.5.1) +## knitr 1.23 2019-05-18 [1] CRAN (R 3.5.2) ## labeling 0.3 2014-08-23 [1] CRAN (R 3.5.0) ## later 0.8.0 2019-02-11 [1] CRAN (R 3.5.2) ## lattice 0.20-35 2017-03-25 [2] CRAN (R 3.5.1) @@ -484,9 +484,9 @@

## Matrix * 1.2-14 2018-04-13 [2] CRAN (R 3.5.1) ## matrixStats * 0.54.0 2018-07-23 [1] CRAN (R 3.5.0) ## memoise 1.1.0 2017-04-21 [1] CRAN (R 3.5.0) -## mime 0.6 2018-10-05 [1] CRAN (R 3.5.0) +## mime 0.7 2019-06-11 [1] CRAN (R 3.5.2) ## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.5.0) -## ParetoTI * 0.1.8 2019-06-13 [1] local +## ParetoTI * 0.1.11 2019-07-04 [1] local ## pillar 1.4.1 2019-05-28 [1] CRAN (R 3.5.2) ## pkgbuild 1.0.3 2019-03-20 [1] CRAN (R 3.5.1) ## pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.5.0) @@ -496,13 +496,13 @@

## plotly 4.9.0 2019-04-10 [1] CRAN (R 3.5.1) ## plyr 1.8.4 2016-06-08 [1] CRAN (R 3.5.0) ## prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.5.0) -## processx 3.3.0 2019-03-10 [1] CRAN (R 3.5.2) +## processx 3.3.1 2019-05-08 [1] CRAN (R 3.5.2) ## promises 1.0.1 2018-04-13 [1] CRAN (R 3.5.0) ## ps 1.3.0 2018-12-21 [1] CRAN (R 3.5.0) ## purrr 0.3.2 2019-03-15 [1] CRAN (R 3.5.2) ## R.methodsS3 1.7.1 2016-02-16 [1] CRAN (R 3.5.0) ## R.oo 1.22.0 2018-04-22 [1] CRAN (R 3.5.0) -## R.utils 2.8.0 2019-02-14 [1] CRAN (R 3.5.2) +## R.utils 2.9.0 2019-06-13 [1] CRAN (R 3.5.1) ## R6 2.4.0 2019-02-14 [1] CRAN (R 3.5.2) ## Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.5.2) ## RCurl 1.95-4.12 2019-03-04 [1] CRAN (R 3.5.2) @@ -513,7 +513,7 @@

## rhdf5 2.26.2 2019-01-02 [1] Bioconductor ## Rhdf5lib 1.4.3 2019-03-25 [1] Bioconductor ## rlang 0.3.4 2019-04-07 [1] CRAN (R 3.5.2) -## rmarkdown 1.12 2019-03-14 [1] CRAN (R 3.5.2) +## rmarkdown 1.13 2019-05-22 [1] CRAN (R 3.5.2) ## rngtools * 1.3.1 2018-05-15 [1] CRAN (R 3.5.0) ## roxygen2 6.1.1 2018-11-07 [1] CRAN (R 3.5.0) ## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.5.0) @@ -526,7 +526,7 @@

## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.5.0) ## shiny 1.3.2 2019-04-22 [1] CRAN (R 3.5.2) ## SingleCellExperiment * 1.4.1 2019-01-04 [1] Bioconductor -## statmod 1.4.30 2017-06-18 [1] CRAN (R 3.5.0) +## statmod 1.4.32 2019-05-29 [1] CRAN (R 3.5.2) ## stringi 1.4.3 2019-03-12 [1] CRAN (R 3.5.2) ## stringr 1.4.0 2019-02-10 [1] CRAN (R 3.5.2) ## SummarizedExperiment * 1.12.0 2018-10-30 [1] Bioconductor @@ -539,8 +539,8 @@

## viridis 0.5.1 2018-03-29 [1] CRAN (R 3.5.0) ## viridisLite 0.3.0 2018-02-01 [1] CRAN (R 3.5.0) ## withr 2.1.2 2018-03-15 [1] CRAN (R 3.5.0) -## xfun 0.6 2019-04-02 [1] CRAN (R 3.5.1) -## XML 3.98-1.19 2019-03-06 [1] CRAN (R 3.5.2) +## xfun 0.7 2019-05-14 [1] CRAN (R 3.5.2) +## XML 3.98-1.20 2019-06-06 [1] CRAN (R 3.5.2) ## xml2 1.2.0 2018-01-24 [1] CRAN (R 3.5.0) ## xtable 1.8-4 2019-04-21 [1] CRAN (R 3.5.2) ## XVector 0.22.0 2018-10-30 [1] Bioconductor diff --git a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-1.png b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-1.png index 2e81c72..de3e02d 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-1.png and b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-1.png differ diff --git a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-2.png b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-2.png index f5860e0..18ef53c 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-2.png and b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-2.png differ diff --git a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-3.png b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-3.png index 07f67c5..9c7a533 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-3.png and b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-3.png differ diff --git a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-4.png b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-4.png index 9b55303..dc18793 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/k_poly-4.png and b/docs/articles/Hepatocyte_example_files/figure-html/k_poly-4.png differ diff --git a/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-1-1.png index cf9c7eb..9f4dace 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-1-1.png and b/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-3-1.png index 5633112..32e501b 100644 Binary files a/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-3-1.png and b/docs/articles/Hepatocyte_example_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 1da6c7e..7a2a051 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -60,7 +60,7 @@ ParetoTI - 0.1.9 + 0.1.11

diff --git a/docs/authors.html b/docs/authors.html index f0c697f..332a9e0 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -60,7 +60,7 @@ ParetoTI - 0.1.9 + 0.1.11
diff --git a/docs/index.html b/docs/index.html index e434a68..fed023c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,11 +5,11 @@ -R toolbox for Archetypal Analysis and Pareto Task Inference on single cell data (based on ParTI) • ParetoTI +R toolbox for Archetypal Analysis and Pareto Task Inference on single cell data • ParetoTI - + ParetoTI - 0.1.9 + 0.1.11
@@ -126,7 +126,8 @@

# Install python dependencies (like py_pcha) into python conda environment, # and (optionally) install *extra_packages*. ParetoTI::install_py_pcha(method = "conda", - extra_packages = c("tensorflow", "pandas", "keras", "h5py", + extra_packages = c("tensorflow", "tensorflow-probability", + "pandas", "keras", "h5py", "geosketch", "pydot", "scikit-learn==0.20", "umap-learn"))
# If no conda manager installed on your machine, install python 3 miniconda distribution:
@@ -140,13 +141,17 @@ 

# Light install: source activate reticulate_PCHA && pip install --upgrade py_pcha numpy scipy datetime geosketch umap-learn # To use more features: -source activate reticulate_PCHA && pip install --upgrade py_pcha numpy scipy datetime tensorflow pandas keras h5py geosketch pydot sklearn umap-learn +source activate reticulate_PCHA && pip install --upgrade py_pcha numpy scipy datetime tensorflow tensorflow-probability pandas keras h5py geosketch pydot sklearn umap-learn -# On some platforms R sees only the "base" conda enviroment (like RStudio Server) +# Sometimes on some platforms R sees only the "base" conda enviroment +# (like when RStudio Server is setup incorrectly) # In that case use: source activate base && pip install --upgrade py_pcha numpy scipy datetime geosketch umap-learn

# Finally, check that py_pcha library is successfully installed and discoverable
 reticulate::py_discover_config("py_pcha")
+
# To make sure R uses the correct conda enviroment you can run this when you start R:
+reticulate::use_condaenv("reticulate_PCHA", conda = "auto",
+                         required = TRUE) # set TRUE to force R to use reticulate_PCHA

Example: Finding archetypes in data created by simulating a triangle

library(ParetoTI)
 library(ggplot2)
@@ -165,24 +170,24 @@ 

# Plot static 2D scatterplot using ggplot2 plot_arc(arch_data = arc_data, data = data, which_dimensions = 1:2) + - theme_bw() + theme_bw() # Plot data as 2D density rather than scatterplot plot_arc(arch_data = arc_data, data = data, which_dimensions = 1:2, geom = ggplot2::geom_bin2d) + - theme_bw()

+ theme_bw()
# Project to UMAP coordinates (3D -> 2D)
 arc_umap = arch_to_umap(arc_data, data, which_dimensions = 1:2,
                         method = c("naive", # implemented in R and slow
                                    "umap-learn")) # requires python module
 plot_arc(arch_data = arc_umap$arch_data, data = arc_umap$data,
     which_dimensions = 1:2) +
-    theme_bw()
+ theme_bw()
# Project to tSNE coordinates (3D -> 2D, requires Rtsne package)
 arc_tsne = arch_to_tsne(arc_data, data, which_dimensions = 1:2)
 plot_arc(arch_data = arc_tsne$arch_data, data = arc_tsne$data,
     which_dimensions = 1:2) +
-    theme_bw()
+ theme_bw()
diff --git a/docs/reference/align_arc.html b/docs/reference/align_arc.html index c08e522..0f07e32 100644 --- a/docs/reference/align_arc.html +++ b/docs/reference/align_arc.html @@ -64,7 +64,7 @@ ParetoTI - 0.1.9 + 0.1.11
@@ -169,7 +169,7 @@

Examp delta = 0, type = "s") # align archetypes align_arc(arc_data$pch_fits$XC[[1]], arc_data$pch_fits$XC[[2]])
#> $dist -#> [1] 3.710594 +#> [1] 3.526453 #> #> $ind #> [1] 1 2 3 4 5 6 diff --git a/docs/reference/annotate_archetypes-1.png b/docs/reference/annotate_archetypes-1.png index 289d4b4..3cb37be 100644 Binary files a/docs/reference/annotate_archetypes-1.png and b/docs/reference/annotate_archetypes-1.png differ diff --git a/docs/reference/annotate_archetypes.html b/docs/reference/annotate_archetypes.html index cfe223a..b830674 100644 --- a/docs/reference/annotate_archetypes.html +++ b/docs/reference/annotate_archetypes.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11
diff --git a/docs/reference/arch_dist.html b/docs/reference/arch_dist.html index 5c06dc5..41f6d06 100644 --- a/docs/reference/arch_dist.html +++ b/docs/reference/arch_dist.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/dot-archLines.html b/docs/reference/dot-archLines.html index c10b46f..4ec3496 100644 --- a/docs/reference/dot-archLines.html +++ b/docs/reference/dot-archLines.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/find_decreasing.html b/docs/reference/find_decreasing.html index 1ef5704..0101b65 100644 --- a/docs/reference/find_decreasing.html +++ b/docs/reference/find_decreasing.html @@ -68,7 +68,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/find_gam_deriv.html b/docs/reference/find_gam_deriv.html index 9700b31..f8d9554 100644 --- a/docs/reference/find_gam_deriv.html +++ b/docs/reference/find_gam_deriv.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/find_set_activity_AUCell.html b/docs/reference/find_set_activity_AUCell.html index fef8272..bac8550 100644 --- a/docs/reference/find_set_activity_AUCell.html +++ b/docs/reference/find_set_activity_AUCell.html @@ -64,7 +64,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/fit_logistic_model.html b/docs/reference/fit_logistic_model.html index 81e5028..1a53ec8 100644 --- a/docs/reference/fit_logistic_model.html +++ b/docs/reference/fit_logistic_model.html @@ -65,7 +65,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/fit_pch.html b/docs/reference/fit_pch.html index 6079b23..f9febca 100644 --- a/docs/reference/fit_pch.html +++ b/docs/reference/fit_pch.html @@ -42,7 +42,8 @@ fit_convhulln() computes smallest convex hull that encloses a set of points using convhulln and returns positions of convex hull points (positions = TRUE). merge_arch_dist() calculates distance to archtypes, and merges distance matrix to data that was used to identify archetypes, optionally adds other features of data points (through colData). plot.r_pch_fit() plot distribution of t-ratio, total variance in archetype positions and variance explained used to calculate empirical p-value -plot_dim_var() plot distribution of variance in archetype positions in each dimension and corresponding empirical p-values" /> +plot_dim_var() plot distribution of variance in archetype positions in each dimension and corresponding empirical p-values +pch_fit() a constructor function for the "pch_fit" class" /> @@ -73,7 +74,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -137,14 +138,15 @@

Find the smallest polytope (Principal Convex Hull) that contains most of the

merge_arch_dist() calculates distance to archtypes, and merges distance matrix to data that was used to identify archetypes, optionally adds other features of data points (through colData).

plot.r_pch_fit() plot distribution of t-ratio, total variance in archetype positions and variance explained used to calculate empirical p-value

plot_dim_var() plot distribution of variance in archetype positions in each dimension and corresponding empirical p-values

+

pch_fit() a constructor function for the "pch_fit" class

fit_pch(data, noc = as.integer(3), I = NULL, U = NULL, delta = 0,
   verbose = FALSE, conv_crit = 1e-04, maxiter = 500,
   check_installed = T, order_by = seq(1, nrow(data)),
-  order_type = c("cosine", "side", "align")[3],
-  volume_ratio = c("t_ratio", "variance_ratio", "none")[1],
+  order_type = c("align", "cosine", "side"),
+  volume_ratio = c("t_ratio", "variance_ratio", "none"),
   converge_else_fail = TRUE, var_in_dims = FALSE,
   normalise_var = TRUE, method = c("pcha", "kmeans"),
   method_options = list())
@@ -207,7 +209,10 @@ 

Find the smallest polytope (Principal Convex Hull) that contains most of the dim_names = c("V1", "V2", "V3", "V4", "V5", "V6"), colors = c("#1F77B4", "#D62728", "#2CA02C", "#17BED0", "#006400", "#FF7E0F"), nudge_y = 0.5, nudge_x = 0.5, text_lab_size = 4, - line_size = 0.5)

+ line_size = 0.5) + +pch_fit(XC, S, C, SSE, varexpl, arc_vol, hull_vol, t_ratio, var_vert, + var_dim, total_var, summary, call)

Arguments

@@ -449,7 +454,7 @@

Examp return_data = T, return_arc = T, sample_prop = 0.65, order_type = "align", noc = as.integer(3), delta = 0.1, bootstrap_type = "cmq")
#> Error in apply(x, MARGIN = MARGIN, FUN = function(x_i, n) { x_i[sample.int(n, n, replace = replace, prob = prob)]}, n): dim(X) must have a positive length
p = plot_arc(arc_data, data, which_dimensions = 1:2, line_size = 1)
#> Error in is(arch_data, "pch_fit"): object 'arc_data' not found
# plot shapes of observed data and randomised data side-by-side using cowplot -library(cowplot)
#> Warning: package ‘cowplot’ was built under R version 3.5.2
#> Loading required package: ggplot2
#> +library(cowplot)
#> Warning: package ‘cowplot’ was built under R version 3.5.2
#> Loading required package: ggplot2
#> Warning: package ‘ggplot2’ was built under R version 3.5.2
#> #> Attaching package: ‘cowplot’
#> The following object is masked from ‘package:ggplot2’: #> #> ggsave
# create compound plot diff --git a/docs/reference/generate_arc.html b/docs/reference/generate_arc.html index 1407913..bc9a526 100644 --- a/docs/reference/generate_arc.html +++ b/docs/reference/generate_arc.html @@ -64,7 +64,7 @@ ParetoTI - 0.1.9 + 0.1.11
diff --git a/docs/reference/geo_sketch.html b/docs/reference/geo_sketch.html index c56e2d0..133dcef 100644 --- a/docs/reference/geo_sketch.html +++ b/docs/reference/geo_sketch.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/hello.html b/docs/reference/hello.html index 2fbdc8d..0f2eb8a 100644 --- a/docs/reference/hello.html +++ b/docs/reference/hello.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/index.html b/docs/reference/index.html index 560c0cb..2fd4d9c 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -60,7 +60,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -175,7 +175,7 @@

fit_pch() k_fit_pch() fit_pch_bootstrap() average_pch_fits() fit_pch_resample() randomise_fit_pch() randomise_fit_pch1() fit_convhulln() merge_arch_dist() print(<pch_fit>) print(<k_pch_fit>) print(<b_pch_fit>) print(<r_pch_fit>) plot(<r_pch_fit>) plot_dim_var()

+

fit_pch() k_fit_pch() fit_pch_bootstrap() average_pch_fits() fit_pch_resample() randomise_fit_pch() randomise_fit_pch1() fit_convhulln() merge_arch_dist() print(<pch_fit>) print(<k_pch_fit>) print(<b_pch_fit>) print(<r_pch_fit>) plot(<r_pch_fit>) plot_dim_var() pch_fit()

@@ -240,6 +240,12 @@

compute_gene_deviance() poisson_regression()

+ +

+ + @@ -247,9 +253,9 @@

project_to_pcs()

+

project_to_pcs() project_from_pc()

-

+

Find the smallest polytope (Principal Convex Hull) that contains most of the data

Poisson regression model to find effects of sample covariates on variables

build_logit_vert() make_features() predict_vert() split_train_val() plot_confusion_vert() get_feature_weights()

Project archetype and data in low dimentions (PCA)

Project archetypes and data in PCA dimentions

diff --git a/docs/reference/install_py_pcha.html b/docs/reference/install_py_pcha.html index a456554..e8f05f8 100644 --- a/docs/reference/install_py_pcha.html +++ b/docs/reference/install_py_pcha.html @@ -64,7 +64,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -123,7 +123,7 @@

Install python module py_pcha and numpy

install_py_pcha(method = "auto", conda = "auto",
-  python_version = "python 3.7.3", envname = "reticulate_PCHA_37",
+  python_version = "python 3.7.3", envname = "reticulate_PCHA",
   overwrite_env = F, extra_packages = character(0),
   packages = c("pip", "py_pcha", "numpy", "scipy", "datetime"))
@@ -174,11 +174,7 @@

Examp install_py_pcha(extra_packages = c("tensorflow", "pandas", "keras", "h5py", "geosketch", "pydot", "sklearn", "umap-learn")) -## If installation fails with an error "Cannot fetch index base URL http://pypi.python.org/simple/" try this solution: - -# "Older versions of pip and distribute default to http://pypi.python.org/simple, which no longer works. A solution is to install an up-to-date pip and distribute using pip install -i https://pypi.python.org/simple -U pip distribute into the virtual environment before running the rest of the build process." - -# use command line to set directory for user libraries, update pip, setuptools, wheel in that directory, useful to add that directory to .bashrc PYTHONPATH=dir +## See for installation details # } diff --git a/docs/reference/measure_activity.html b/docs/reference/measure_activity.html index 1958a55..63fa341 100644 --- a/docs/reference/measure_activity.html +++ b/docs/reference/measure_activity.html @@ -68,7 +68,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/plot_arc-1.png b/docs/reference/plot_arc-1.png index ba944df..0a5b0fa 100644 Binary files a/docs/reference/plot_arc-1.png and b/docs/reference/plot_arc-1.png differ diff --git a/docs/reference/plot_arc-2.png b/docs/reference/plot_arc-2.png index 7377b0c..c2a6d3e 100644 Binary files a/docs/reference/plot_arc-2.png and b/docs/reference/plot_arc-2.png differ diff --git a/docs/reference/plot_arc-3.png b/docs/reference/plot_arc-3.png index 58d419d..e139693 100644 Binary files a/docs/reference/plot_arc-3.png and b/docs/reference/plot_arc-3.png differ diff --git a/docs/reference/plot_arc-4.png b/docs/reference/plot_arc-4.png index ce69ce8..88ce8ae 100644 Binary files a/docs/reference/plot_arc-4.png and b/docs/reference/plot_arc-4.png differ diff --git a/docs/reference/plot_arc.html b/docs/reference/plot_arc.html index 65acb59..035e6b7 100644 --- a/docs/reference/plot_arc.html +++ b/docs/reference/plot_arc.html @@ -65,7 +65,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -126,7 +126,7 @@

Plot data with archetypes in 2D, 3D and a panel of 2D

plot_arc(arch_data = NULL, data, which_dimensions = as.integer(1:2),
   type = c("average", "all")[1], average_func = mean,
-  geom = list(ggplot2::geom_point, ggplot2::geom_bin2d)[[1]],
+  geom = list(ggplot2::geom_point, ggplot2::geom_bin2d)[[1]],
   colors = c("#1F77B4", "#D62728", "#2CA02C", "#17BED0", "#006400",
   "#FF7E0F"), arch_size = NULL, arch_alpha = 0.4, data_size = 4,
   data_alpha = 1, line_size = NULL, data_lab = "data",
@@ -263,9 +263,9 @@ 

Examp data = generate_data(archetypes$XC, N_examples = 1e4, jiiter = 0.04, size = 0.9) plot_arc(arch_data = archetypes, data = data, which_dimensions = 1:2) + - theme_bw()
# Plot data as 2D density rather than points + theme_bw()
# Plot data as 2D density rather than points plot_arc(arch_data = archetypes, data = data, - which_dimensions = 1:2, geom = ggplot2::geom_bin2d)
+ which_dimensions = 1:2, geom = ggplot2::geom_bin2d)
# Random data that fits into the triangle (3D) set.seed(4355) archetypes = generate_arc(arc_coord = list(c(5, 0, 4), c(-10, 15, 0), c(-30, -20, -5)), @@ -281,14 +281,14 @@

Examp arc_tsne = arch_to_tsne(archetypes, data, which_dimensions = 1:2) plot_arc(arch_data = arc_tsne$arch_data, data = arc_tsne$data, which_dimensions = 1:2) + - theme_bw()

# Project to UMAP representation arc_umap = arch_to_umap(archetypes, data, which_dimensions = 1:2, method = c("naive", # implemented in R and slow "umap-learn")) # requires python module plot_arc(arch_data = arc_umap$arch_data, data = arc_umap$data, which_dimensions = 1:2) + - theme_bw()

+ theme_bw()
@@ -168,9 +168,9 @@

Examp plot_arc(arch_data = arc_data, data = data, which_dimensions = 1:2, type = "all", arch_size = 2, colors = c("#D62728", "#1F77B4", "#2CA02C", "#17BED0", "grey")) + - theme_bw()
#> Error in is(arch_data, "pch_fit"): object 'arc_data' not found
# Show variance explained by a polytope with each k + theme_bw()
#> Error in is(arch_data, "pch_fit"): object 'arc_data' not found
# Show variance explained by a polytope with each k plot_arc_var(arc_data, type = c("varexpl", "SSE", "res_varexpl")[1], - point_size = 2, line_size = 1.5) + theme_bw()
#> Error in is(arc_data, "k_pch_fit"): object 'arc_data' not found
+ point_size = 2, line_size = 1.5) + theme_bw()
#> Error in is(arc_data, "k_pch_fit"): object 'arc_data' not found
diff --git a/docs/reference/plot_gam.html b/docs/reference/plot_gam.html index 61f4994..1ed469d 100644 --- a/docs/reference/plot_gam.html +++ b/docs/reference/plot_gam.html @@ -63,7 +63,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/poisson_regression.html b/docs/reference/poisson_regression.html new file mode 100644 index 0000000..876b62c --- /dev/null +++ b/docs/reference/poisson_regression.html @@ -0,0 +1,267 @@ + + + + + + + + +Poisson regression model to find effects of sample covariates on variables — poisson_regression • ParetoTI + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+ +

Poisson regression models gene expression (Y) as a function of gene mean and sample covariates (X) +mu = beta * X +Y ~ Poisson(mu)

+ +
+ +
compute_gene_deviance(data, family = "poisson", covar = NULL,
+  precision = c("double", "single"), verbose = FALSE,
+  optimiser = greta::adam(), max_iterations = 5000,
+  tolerance = 1e-06)
+
+poisson_regression(data, covar = NULL, beta_mean = 0, beta_sd = 3,
+  precision = c("double", "single"))
+ +

Arguments

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
data

data matrix (data points * dimensions), can be dense and sparse matrix, SummarizedExperiment/SingleCellExperiment, Seurat (counts slot is used). poisson_regression() accepts only a dense matrix at the moment (limitation of greta).

family

character naming the data distribution

covar

matrix (data points * covariates) or vector of column names (for compute_gene_deviance() and SingleCellExperiment, Seurat) containing covariates affecting expression in addition to gene mean (coverage, batch). Adding this will find genes whose deviance (residuals) is unexplained both by these covariates and Poisson noise (covar = NULL tests Poisson noise alone).

precision

argument for model. Use "single" for large datasets to reduce memory footprint

verbose

logical, plot greta model structure and print messages?

optimiser

method to use for finding regression coefficients and deviance when adding covariates see opt

max_iterations

number of iterations to run optimiser for.

tolerance

the numerical tolerance for the solution, the optimiser stops when the (absolute) difference in the joint density between successive iterations drops below this level.

beta_mean

prior mean for coefficients

beta_sd

prior sd for coefficients, use small values to regularise (e.g. penalise coefficients that deviate too much from 0)

+ +

Value

+ +

compute_gene_deviance(): list containing the deviance vector with dimension names (genes) as names, beta coefficient matrix (dimensions * coeffs) and greta model used to compute those. For SingleCellExperiment the same object with beta coeffecients and deviance as rowData is returned. For Seurat the same object is returned updated with beta coeffecients and deviance in Seurat::GetAssay(obj, "RNA")@meta.features.

+

poisson_regression(): R environment containing the model and parameters as greta arrays

+ + +

Examples

+
# Use fake data as example +# Random data that fits into the triangle +set.seed(4355) +arc_data = generate_arc(arc_coord = list(c(7, 3, 10), c(12, 17, 11), c(30, 20, 9)), + mean = 0, sd = 1) +data = generate_data(arc_data$XC, N_examples = 1e4, jiiter = 0.04, size = 0.9) +# Take Poisson sample with the mean defined by each entry of the data matrix +# (this create Poisson-distributed positive integer data) +data = matrix(rpois(length(data), (data)), nrow(data), ncol(data)) + +# Compute deviance from the mean (residuals for Poisson data) +dev = compute_gene_deviance(t(data)) +# As you can see, the third dimension has lowest deviance +dev
#> $deviance +#> [1] 19179.31 16610.19 10733.26 +#> +#> $beta +#> beta_mean +#> [1,] 2.639314 +#> [2,] 2.546119 +#> [3,] 2.158645 +#> +#> $model +#> NULL +#>
# because the vertices of the triangle have almost identical position in third dimension. +plot_arc(arch_data = arc_data, data = data, + which_dimensions = 1:3, data_alpha = 0.5)
#> No trace type specified: +#> Based on info supplied, a 'scatter3d' trace seems appropriate. +#> Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
#> A marker object has been specified, but markers is not in the mode +#> Adding markers to the mode...
# You can use deviance to find which dimension have variability to be explained with Archetypal Analysis + +# Create a probabilistic Poisson regression model with greta +# to study effects of covariates on Poisson data (requires greta installed) +
# NOT RUN { +model = poisson_regression(t(data), + covar = matrix(rnorm(ncol(data)), ncol(data), 1)) +# plot the structure of tensorflow computation graph +plot(model$model) + +# find parameters using adam optimiser +res = greta::opt(model$model, optimiser = greta::adam(), max_iterations = 500) +# did the model converge before 500 iterations? +res$iterations +# Value of Poisson negative log likelihood (see greta documentation for details) +res$value +# View beta parameters for each dimension (columns), log(mean) in the first row, +# covariate coefficients in the subsequent rows +res$par$beta +# }
+
+ +
+ +
+ + +
+

Site built with pkgdown 1.3.0.

+
+
+
+ + + + + + diff --git a/docs/reference/predict_vert.html b/docs/reference/predict_vert.html index bfc457a..523c794 100644 --- a/docs/reference/predict_vert.html +++ b/docs/reference/predict_vert.html @@ -68,7 +68,7 @@ ParetoTI - 0.1.9 + 0.1.11 diff --git a/docs/reference/project_to_pcs-1.png b/docs/reference/project_to_pcs-1.png new file mode 100644 index 0000000..27e53b9 Binary files /dev/null and b/docs/reference/project_to_pcs-1.png differ diff --git a/docs/reference/project_to_pcs-2.png b/docs/reference/project_to_pcs-2.png new file mode 100644 index 0000000..442804d Binary files /dev/null and b/docs/reference/project_to_pcs-2.png differ diff --git a/docs/reference/project_to_pcs-3.png b/docs/reference/project_to_pcs-3.png new file mode 100644 index 0000000..27e53b9 Binary files /dev/null and b/docs/reference/project_to_pcs-3.png differ diff --git a/docs/reference/project_to_pcs.html b/docs/reference/project_to_pcs.html index 3277496..fe7d934 100644 --- a/docs/reference/project_to_pcs.html +++ b/docs/reference/project_to_pcs.html @@ -6,7 +6,7 @@ -Project archetype and data in low dimentions (PCA) — project_to_pcs • ParetoTI +Project archetypes and data in PCA dimentions — project_to_pcs • ParetoTI @@ -30,10 +30,10 @@ - + - + @@ -64,7 +64,7 @@ ParetoTI - 0.1.9 + 0.1.11 @@ -110,19 +110,24 @@
-

project_to_pcs() projects archetypes (arc_data) and data points (arc_data) to PC space. Data is standardised prior to PCA by default.

-

project_from_pc() projects archetypes and data points to original space provided SVD decomposition results

+

project_to_pcs() projects archetypes (arc_data) and data points (arc_data) to PC space. Archetypes are projected into PC space of data, e.i. archetypes do not affect PCA and are just projected afterwards.

+

project_from_pc() projects archetypes and data points to original space provided SVD decomposition results. Optionally do the reverse of log2 transformation to obtain normalised expression space.

-
project_to_pcs(arc_data = NULL, data, n_dim = 50, s = NULL)
+
project_to_pcs(arc_data = NULL, data, n_dim = nrow(data), s = NULL,
+  pc_method = c("svd", "irlba"), log2 = FALSE, offset = 1,
+  zscore = FALSE)
+
+project_from_pc(arc_data, s, undo_zscore = FALSE, undo_log2 = FALSE,
+  offset = 1)

Arguments

@@ -141,23 +146,73 @@

Arg

- + + + + + + + + + + + + + + + + + + + + + + + + +
s

list 's' containing SVD decomposition results (U, d, Vt), standard deviation and mean of genes used for decomposition (sds, means)

list 's' containing SVD decomposition results (U, d, Vt), standard deviation and mean of genes used for decomposition (sd, means)

pc_method

method to use for finding PCs: svd or irlba

log2

log2-transform before to z-scoring and PC-projection

offset

log2 transformation offset (e.g. log2(x + offset))

zscore

standardise (substract the mean and divide by standard deviation) prior to PC-projection

undo_zscore

undo z-scoring by multiplying by standard deviation and adding the mean? Undo z-scoring precedes exponentiation.

undo_log2

undo log2-transformation by exponentiating and substracting pseudocount?

+

Value

+ +

project_to_pcs(): list with projected $data, archetypes ($arc_data) and $s list of decomposition matrices, sds and means

+

project_from_pc(): archetypes projected to data space

+

Examples

# Random data that fits into the triangle set.seed(4355) arc_data = generate_arc(arc_coord = list(c(5, 0), c(-10, 15), c(-30, -20)), mean = 0, sd = 1) -data = generate_data(arc_data$XC, N_examples = 1e4, jiiter = 0.04, size = 0.9)
+data = generate_data(arc_data$XC, N_examples = 1e4, jiiter = 0.04, size = 0.9) +# Plot +plot_arc(arch_data = arc_data, data = data, + which_dimensions = 1:2, data_alpha = 0.5) + + ggplot2::theme_bw()
+# Project to PCs (in this case just rotate to align x-axis with +# the axis of most variation because the data is already 2D) +pcs = project_to_pcs(arc_data, data, n_dim = 2, pc_method = c("svd", "irlba")[1]) +# Plot in PC coordinates +plot_arc(arch_data = pcs$arc_data, data = pcs$data, + which_dimensions = 1:2, data_alpha = 0.5) + + ggplot2::theme_bw()
+# Project from PCs back to expression +projected = project_from_pc(pcs$arc_data, pcs$s, + undo_zscore = FALSE, undo_log2 = FALSE) + +# Plot plot in projected coordinates +plot_arc(arch_data = projected, data = data, + which_dimensions = 1:2, data_alpha = 0.5) + + ggplot2::theme_bw()
diff --git a/man/fit_pch.Rd b/man/fit_pch.Rd index bb1d46c..3fda54e 100644 --- a/man/fit_pch.Rd +++ b/man/fit_pch.Rd @@ -16,13 +16,14 @@ \alias{print.r_pch_fit} \alias{plot.r_pch_fit} \alias{plot_dim_var} +\alias{pch_fit} \title{Find the smallest polytope (Principal Convex Hull) that contains most of the data} \usage{ fit_pch(data, noc = as.integer(3), I = NULL, U = NULL, delta = 0, verbose = FALSE, conv_crit = 1e-04, maxiter = 500, check_installed = T, order_by = seq(1, nrow(data)), - order_type = c("cosine", "side", "align")[3], - volume_ratio = c("t_ratio", "variance_ratio", "none")[1], + order_type = c("align", "cosine", "side"), + volume_ratio = c("t_ratio", "variance_ratio", "none"), converge_else_fail = TRUE, var_in_dims = FALSE, normalise_var = TRUE, method = c("pcha", "kmeans"), method_options = list()) @@ -81,6 +82,9 @@ plot_dim_var(rand_arch, ks = unique(rand_arch$rand_dist$k), colors = c("#1F77B4", "#D62728", "#2CA02C", "#17BED0", "#006400", "#FF7E0F"), nudge_y = 0.5, nudge_x = 0.5, text_lab_size = 4, line_size = 0.5) + +pch_fit(XC, S, C, SSE, varexpl, arc_vol, hull_vol, t_ratio, var_vert, + var_dim, total_var, summary, call) } \arguments{ \item{data}{numeric matrix or object coercible to matrix in which to find archetypes, dim(variables/dimentions, examples)} @@ -227,6 +231,8 @@ call - function call. plot.r_pch_fit() plot distribution of t-ratio, total variance in archetype positions and variance explained used to calculate empirical p-value plot_dim_var() plot distribution of variance in archetype positions in each dimension and corresponding empirical p-values + +\code{pch_fit()} a constructor function for the "pch_fit" class } \details{ \code{fit_pch()} provides an R interface to python implementation of PCHA algorithm (Principal Convex Hull Analysis) by Ulf Aslak (https://github.com/ulfaslak/py_pcha) which was originally developed for Archetypal Analysis by Mørup et. al.