From 088b1c29da5a7561e51aedea3237de4d10b4ebc1 Mon Sep 17 00:00:00 2001 From: Yun Zhang Date: Tue, 19 Apr 2022 08:48:20 -0700 Subject: [PATCH] added functions: plot_MST(), plot_exprDist() --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ R/FRmatch.R | 14 ++++++++------ R/FRmatch_cell2cluster.R | 6 +++--- R/plot_FRmatch_cell2cluster.R | 6 ++++-- R/plot_MST.R | 25 ++++++++++++++++++++++++ R/plot_cluster_by_markers.R | 5 +++++ R/plot_exprDist.R | 33 ++++++++++++++++++++++++++++++++ man/FRmatch.Rd | 8 ++++---- man/FRmatch_cell2cluster.Rd | 6 +++--- man/plot_FRmatch_cell2cluster.Rd | 1 + man/plot_MST.Rd | 23 ++++++++++++++++++++++ man/plot_exprDist.Rd | 33 ++++++++++++++++++++++++++++++++ 13 files changed, 145 insertions(+), 19 deletions(-) create mode 100644 R/plot_MST.R create mode 100644 R/plot_exprDist.R create mode 100644 man/plot_MST.Rd create mode 100644 man/plot_exprDist.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3e9452e..616f703 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Authors@R: c( email = "RScheuermann@jcvi.org") ) Description: FR-Match is a cell type cluster mapping algorithm for single cell RNA sequencing (scRNAseq) data. It is based on a statistical test called Friedman-Rafsky (FR) test, which is a multivariate generalization of nonparametric two-sample test. This package also provides visualization tools for the implemented method. -Depends: R (>= 4.0.0), shiny (>= 1.2.0), SingleCellExperiment +Depends: R (>= 4.0.0), shiny (>= 1.2.0), SingleCellExperiment, pbmcapply Imports: methods, S4Vectors, SummarizedExperiment, Seurat, scmap, lsa, igraph, ade4, tibble, dplyr, tidyr, forcats, magrittr, pheatmap, RColorBrewer, ggplot2, gridExtra, viridis Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index a694af4..b5baac1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,9 +8,11 @@ export(make_data_object) export(normalization) export(plot_FRmatch) export(plot_FRmatch_cell2cluster) +export(plot_MST) export(plot_bi_FRmatch) export(plot_clusterSize) export(plot_cluster_by_markers) +export(plot_exprDist) export(plot_nonzero) export(predict_most_similar_cluster) export(runShiny) diff --git a/R/FRmatch.R b/R/FRmatch.R index b9e94bd..728ad21 100644 --- a/R/FRmatch.R +++ b/R/FRmatch.R @@ -13,11 +13,11 @@ #' See details in \code{\link[FRmatch]{sce.example}}. # #' @param imputation INACTIVE. Logical variable indicating if to impute expression zero values for the reference experiment. Default: \code{FALSE}. # #' See details in \code{\link[FRmatch]{impute_dropout}}. -#' @param filter.size,filter.fscore Filtering small/poor-quality clusters. Default: \code{filter.size=10}, filter based on the number +#' @param filter.size,filter.fscore Filtering small/poor-quality clusters. Default: \code{filter.size=5}, filter based on the number #' of cells per cluster; \code{filter.fscore=NULL}, filter based on the F-beta score associated with the cell cluster if available (numeric value). #' @param method Methods for the FR test. Default: \code{method="subsampling"} is to iteratively subsample equal number of cells (i.e. subsample size) #' from the query and reference clusters, and then perform the FR test. Option: \code{method="none"} is the FR test with no modification. -#' @param subsamp.size,subsamp.iter,subsamp.seed Subsample size, number of iterations, and random seed for \code{method="subsampling"}. YMMV. +#' @param subsamp.size,subsamp.iter,subsamp.seed Iterative subsampling size, number of iterations, and random seed for iterations. YMMV. #' @param numCores Number of cores for parallel computing. #' Default: \code{NULL}, use the maximum number of cores detected by \code{\link[parallel]{detectCores}} if not specified (an integer). #' @param prefix Prefix names for query and reference clusters. Default: \code{prefix=c("query.", "ref.")}. @@ -49,8 +49,8 @@ #' @export FRmatch <- function(sce.query, sce.ref, #imputation=FALSE, - filter.size=10, filter.fscore=NULL, #filtering clusters - method="subsampling", subsamp.size=5, subsamp.iter=1000, subsamp.seed=1, #subsampling + filter.size=5, filter.fscore=NULL, #filtering clusters + method="subsampling", subsamp.size=20, subsamp.iter=1000, subsamp.seed=1, #subsampling numCores=NULL, prefix=c("query.", "ref."), verbose=1, return.all=FALSE, ...){ @@ -75,8 +75,10 @@ FRmatch <- function(sce.query, sce.ref, #imputation=FALSE, # } ## extract info from sce.objects - querydat <- assay(sce.query) #matrix - refdat <- assay(sce.ref) + # querydat <- assay(sce.query) #matrix + # refdat <- assay(sce.ref) + querydat <- logcounts(sce.query) #matrix + refdat <- logcounts(sce.ref) membership.query <- colData(sce.query)$cluster_membership membership.ref <- colData(sce.ref)$cluster_membership order.query <- sce.query@metadata$cluster_order diff --git a/R/FRmatch_cell2cluster.R b/R/FRmatch_cell2cluster.R index c3bc824..6db7f6a 100644 --- a/R/FRmatch_cell2cluster.R +++ b/R/FRmatch_cell2cluster.R @@ -9,7 +9,7 @@ #' See details in \code{\link[FRmatch]{sce.example}}. # #' @param imputation INACTIVE. Logical variable indicating if to impute expression zero values for the reference experiment. Default: \code{FALSE}. # #' See details in \code{\link[FRmatch]{impute_dropout}}. -#' @param filter.size,filter.fscore Filtering small/poor-quality clusters. Default: \code{filter.size=10}, filter based on the number +#' @param filter.size,filter.fscore Filtering small/poor-quality clusters. Default: \code{filter.size=5}, filter based on the number #' of cells per cluster; \code{filter.fscore=NULL}, filter based on the F-beta score associated with the cell cluster if available (numeric value). #' @param subsamp.size,subsamp.iter,subsamp.seed Iterative subsampling size, number of iterations, and random seed for iterations. YMMV. #' @param numCores Number of cores for parallel computing. @@ -53,8 +53,8 @@ #' @export FRmatch_cell2cluster <- function(sce.query, sce.ref, #imputation=FALSE, - filter.size=10, filter.fscore=NULL, #filtering clusters - subsamp.size=5, subsamp.iter=2000, subsamp.seed=1, #subsampling + filter.size=5, filter.fscore=NULL, #filtering clusters + subsamp.size=10, subsamp.iter=2000, subsamp.seed=1, #subsampling numCores=NULL, prefix=c("query.", "ref."), verbose=1, ...){ diff --git a/R/plot_FRmatch_cell2cluster.R b/R/plot_FRmatch_cell2cluster.R index 9ab303b..9d1e2b3 100644 --- a/R/plot_FRmatch_cell2cluster.R +++ b/R/plot_FRmatch_cell2cluster.R @@ -21,7 +21,7 @@ plot_FRmatch_cell2cluster <- function(rst.cell2cluster, type="match.prop", p.adj.method="BH", sig.level=0.1, reorder=TRUE, return.value=FALSE, - filename=NA, width=NULL, height=NULL){ + main=NULL, filename=NA, width=NULL, height=NULL){ ## calculate adjusted p-values pmat <- rst.cell2cluster$pmat @@ -52,12 +52,14 @@ plot_FRmatch_cell2cluster <- function(rst.cell2cluster, type="match.prop", p.adj mutate(match=factor(match, levels = rev(c(clusterNames.ref, "unassigned")))) ## plot + if(is.null(main)) main <- "FR-Match cell-to-cluster" g <- ggplot(long.tab.match.prop, aes(x=query.cluster, y=match, size=Prop, fill=Prop)) + geom_point(alpha=0.7, shape=21, color="black") + scale_size_continuous(range = c(0, 10)) + scale_fill_viridis(option="D", guide = "legend") + scale_y_discrete(drop=FALSE) + #show all ref clusters even if no match - theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + ggtitle(main) ## save plot or plot on device if(is.null(width)) width <- ncol(tab.match.prop)*.2+.5 if(is.null(height)) height <- nrow(tab.match.prop)*.2 diff --git a/R/plot_MST.R b/R/plot_MST.R new file mode 100644 index 0000000..ed695f2 --- /dev/null +++ b/R/plot_MST.R @@ -0,0 +1,25 @@ + +#' Plot minimum spanning tree (MST) +#' +#' This function is a wrapper function for plotting MST of two interested clusters. +#' +#' @param sce.query,sce.ref Query and reference data objects. +#' @param query.cluster,ref.cluster Query and reference cluster names to plot. +#' @param nsamp Number of randomly selected cells to plot for large cluster. Default: 30. +#' @param ... Additional arguments passed to \code{\link[FRmatch]{FRtest}}. +#' +#' @return MST plot and FR-test result in console. +#' +#' @export + +plot_MST <- function(sce.query, sce.ref, query.cluster, ref.cluster, nsamp=30, ...){ + ind.query <- sce.query@colData$cluster_membership==query.cluster + ind.query.sub <- sample(1:sum(ind.query), min(nsamp,sum(ind.query))) + samp1 <- assay(sce.query)[,ind.query][,ind.query.sub] + + ind.ref <- sce.ref@colData$cluster_membership==ref.cluster + ind.ref.sub <- sample(1:sum(ind.ref), min(nsamp,sum(ind.ref))) + samp2 <- assay(sce.ref)[,ind.ref][,ind.ref.sub] + + FRtest(samp1, samp2, plot.MST=T, label.names=c(query.cluster,ref.cluster)) +} diff --git a/R/plot_cluster_by_markers.R b/R/plot_cluster_by_markers.R index 539c11b..6cc9a1c 100644 --- a/R/plot_cluster_by_markers.R +++ b/R/plot_cluster_by_markers.R @@ -27,8 +27,13 @@ plot_cluster_by_markers <- function(sce.E1, sce.E2=NULL, cluster.name, nsamp=30, if(!cluster.name %in% unique(colData(sce.query)$cluster_membership)){ stop(paste(cluster.name, "is not found in the plotting data object. \n"))} + ## REORDER clusters according to the given order if available + if(!is.null(sce.ref@metadata$cluster_order)){ + sce.ref@metadata$cluster_marker_info %<>% arrange(match(clusterName, sce.ref@metadata$cluster_order))} + ## reference marker genes markergenes <- unique(sce.ref@metadata$cluster_marker_info$markerGene) #marker genes in ORDER!!! + if(is.null(markergenes)) markergenes <- rownames(sce.ref)[rowData(sce.ref)$marker_gene==1] #if metadat is not available ## cells of query cluster col.query <- colData(sce.query)$cluster_membership==cluster.name diff --git a/R/plot_exprDist.R b/R/plot_exprDist.R new file mode 100644 index 0000000..b85acb9 --- /dev/null +++ b/R/plot_exprDist.R @@ -0,0 +1,33 @@ + +#' Gene expression data distribution plot +#' +#' This function plots the expression data distributions of the two single cell datasets (e.g. query and reference) to be compared. +#' +#' @param sce.E1,sce.E2 Data objects, namely E1 and E2. +#' @param name.E1,name.E2 Customized names for E1 and E2. Default: \code{"E1"} and \code{"E2"}, respectively. +#' @param breaks,xlim,ylim Plotting parameters passed to histogram plot. +#' @param filename File name if to save the plot. Default: \code{NA}, not to save the plot. +#' @param width,height Width and height for saved plot. +#' +#' @export + +plot_exprDist <- function(sce.E1, sce.E2, name.E1="E1", name.E2="E2", + breaks=20, xlim=c(0,10), ylim=c(0,1.7), + filename=NA, width=10, height=5){ + ## to save pdf + if(!is.na(filename)){pdf(filename, width=width, height=height)} + + ## plot + par(mfrow=c(1,2), mar=c(3,4,3,2)) + hist(logcounts(sce.E1), freq=F, xlab="", + breaks=breaks, xlim=xlim, ylim=ylim, main=name.E1) + ss <- summary(as.vector(logcounts(sce.E1))) + legend("topright", paste(names(ss),"=", round(ss,3)), bty="n") + hist(logcounts(sce.E2), freq=F, xlab="", + breaks=breaks, xlim=xlim, ylim=ylim, main=name.E2) + ss <- summary(as.vector(logcounts(sce.E2))) + legend("topright", paste(names(ss),"=", round(ss,3)), bty="n") + + ## to close pdf + if(!is.na(filename)){dev.off()} +} diff --git a/man/FRmatch.Rd b/man/FRmatch.Rd index 58d7a81..474636e 100644 --- a/man/FRmatch.Rd +++ b/man/FRmatch.Rd @@ -7,10 +7,10 @@ FRmatch( sce.query, sce.ref, - filter.size = 10, + filter.size = 5, filter.fscore = NULL, method = "subsampling", - subsamp.size = 5, + subsamp.size = 20, subsamp.iter = 1000, subsamp.seed = 1, numCores = NULL, @@ -27,13 +27,13 @@ See details in \code{\link[FRmatch]{sce.example}}.} \item{sce.ref}{Data object of the \link[SingleCellExperiment]{SingleCellExperiment} class for reference experiment. See details in \code{\link[FRmatch]{sce.example}}.} -\item{filter.size, filter.fscore}{Filtering small/poor-quality clusters. Default: \code{filter.size=10}, filter based on the number +\item{filter.size, filter.fscore}{Filtering small/poor-quality clusters. Default: \code{filter.size=5}, filter based on the number of cells per cluster; \code{filter.fscore=NULL}, filter based on the F-beta score associated with the cell cluster if available (numeric value).} \item{method}{Methods for the FR test. Default: \code{method="subsampling"} is to iteratively subsample equal number of cells (i.e. subsample size) from the query and reference clusters, and then perform the FR test. Option: \code{method="none"} is the FR test with no modification.} -\item{subsamp.size, subsamp.iter, subsamp.seed}{Subsample size, number of iterations, and random seed for \code{method="subsampling"}. YMMV.} +\item{subsamp.size, subsamp.iter, subsamp.seed}{Iterative subsampling size, number of iterations, and random seed for iterations. YMMV.} \item{numCores}{Number of cores for parallel computing. Default: \code{NULL}, use the maximum number of cores detected by \code{\link[parallel]{detectCores}} if not specified (an integer).} diff --git a/man/FRmatch_cell2cluster.Rd b/man/FRmatch_cell2cluster.Rd index fe917c7..d83cbe9 100644 --- a/man/FRmatch_cell2cluster.Rd +++ b/man/FRmatch_cell2cluster.Rd @@ -7,9 +7,9 @@ FRmatch_cell2cluster( sce.query, sce.ref, - filter.size = 10, + filter.size = 5, filter.fscore = NULL, - subsamp.size = 5, + subsamp.size = 10, subsamp.iter = 2000, subsamp.seed = 1, numCores = NULL, @@ -25,7 +25,7 @@ See details in \code{\link[FRmatch]{sce.example}}.} \item{sce.ref}{Data object of the \link[SingleCellExperiment]{SingleCellExperiment} class for reference experiment. See details in \code{\link[FRmatch]{sce.example}}.} -\item{filter.size, filter.fscore}{Filtering small/poor-quality clusters. Default: \code{filter.size=10}, filter based on the number +\item{filter.size, filter.fscore}{Filtering small/poor-quality clusters. Default: \code{filter.size=5}, filter based on the number of cells per cluster; \code{filter.fscore=NULL}, filter based on the F-beta score associated with the cell cluster if available (numeric value).} \item{subsamp.size, subsamp.iter, subsamp.seed}{Iterative subsampling size, number of iterations, and random seed for iterations. YMMV.} diff --git a/man/plot_FRmatch_cell2cluster.Rd b/man/plot_FRmatch_cell2cluster.Rd index 496cd86..ce6611e 100644 --- a/man/plot_FRmatch_cell2cluster.Rd +++ b/man/plot_FRmatch_cell2cluster.Rd @@ -11,6 +11,7 @@ plot_FRmatch_cell2cluster( sig.level = 0.1, reorder = TRUE, return.value = FALSE, + main = NULL, filename = NA, width = NULL, height = NULL diff --git a/man/plot_MST.Rd b/man/plot_MST.Rd new file mode 100644 index 0000000..a710008 --- /dev/null +++ b/man/plot_MST.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_MST.R +\name{plot_MST} +\alias{plot_MST} +\title{Plot minimum spanning tree (MST)} +\usage{ +plot_MST(sce.query, sce.ref, query.cluster, ref.cluster, nsamp = 30, ...) +} +\arguments{ +\item{sce.query, sce.ref}{Query and reference data objects.} + +\item{query.cluster, ref.cluster}{Query and reference cluster names to plot.} + +\item{nsamp}{Number of randomly selected cells to plot for large cluster. Default: 30.} + +\item{...}{Additional arguments passed to \code{\link[FRmatch]{FRtest}}.} +} +\value{ +MST plot and FR-test result in console. +} +\description{ +This function is a wrapper function for plotting MST of two interested clusters. +} diff --git a/man/plot_exprDist.Rd b/man/plot_exprDist.Rd new file mode 100644 index 0000000..b3be530 --- /dev/null +++ b/man/plot_exprDist.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_exprDist.R +\name{plot_exprDist} +\alias{plot_exprDist} +\title{Gene expression data distribution plot} +\usage{ +plot_exprDist( + sce.E1, + sce.E2, + name.E1 = "E1", + name.E2 = "E2", + breaks = 20, + xlim = c(0, 10), + ylim = c(0, 1.7), + filename = NA, + width = 10, + height = 5 +) +} +\arguments{ +\item{sce.E1, sce.E2}{Data objects, namely E1 and E2.} + +\item{name.E1, name.E2}{Customized names for E1 and E2. Default: \code{"E1"} and \code{"E2"}, respectively.} + +\item{breaks, xlim, ylim}{Plotting parameters passed to histogram plot.} + +\item{filename}{File name if to save the plot. Default: \code{NA}, not to save the plot.} + +\item{width, height}{Width and height for saved plot.} +} +\description{ +This function plots the expression data distributions of the two single cell datasets (e.g. query and reference) to be compared. +}