From 0f9b5c9c0d04ec718318d6924e35b38957b4b973 Mon Sep 17 00:00:00 2001 From: Hugo VARET Date: Wed, 23 Mar 2022 10:20:41 +0100 Subject: [PATCH] ashr --- DESCRIPTION | 6 +++--- NAMESPACE | 2 +- NEWS | 5 +++++ R/NAMESPACE.R | 2 +- R/descriptionPlots.r | 5 +++-- R/exploreCounts.R | 4 ++-- R/run.DESeq2.r | 9 ++++++--- R/summarizeResults.DESeq2.r | 4 ++-- R/summarizeResults.edgeR.r | 4 ++-- inst/bibliography.bib | 16 ++++++++++++++++ inst/report_DESeq2.rmd | 2 +- man/descriptionPlots.Rd | 4 ++-- man/exploreCounts.Rd | 4 ++-- man/summarizeResults.DESeq2.Rd | 4 ++-- man/summarizeResults.edgeR.Rd | 4 ++-- template_script_DESeq2.r | 4 ++-- template_script_DESeq2_CL.r | 4 ++-- template_script_edgeR.r | 4 ++-- template_script_edgeR_CL.r | 4 ++-- 19 files changed, 58 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3627d4e..3568c30 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Package: SARTools Type: Package Title: Statistical Analysis of RNA-Seq Tools -Version: 1.8.0 -Date: 2022-02-17 +Version: 1.8.1 +Date: 2022-03-23 Author: Marie-Agnes Dillies and Hugo Varet Maintainer: Hugo Varet Depends: R (>= 3.3.0), DESeq2 (>= 1.32.0), - apeglm (>= 1.14.0), + ashr (>= 2.2-54), edgeR (>= 3.34.0), ggplot2 (>= 3.3.0), kableExtra diff --git a/NAMESPACE b/NAMESPACE index 686bd71..e9e7fa8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,7 @@ exportPattern("^[a-zA-Z]") import(DESeq2) -import(apeglm) +import(ashr) import(edgeR) import(ggplot2) import(kableExtra) diff --git a/NEWS b/NEWS index ba56686..4242ae5 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +CHANGES IN VERSION 1.8.1 +------------------------ + o log2FoldChanges from DESeq2 are now computed using lfcShrink() with type="ashr" as "apeglm" + was not compatible with the contrast argument + CHANGES IN VERSION 1.8.0 ------------------------ o log2FoldChanges from DESeq2 are now computed using lfcShrink() with type="apeglm" diff --git a/R/NAMESPACE.R b/R/NAMESPACE.R index e9eadf1..5f743bd 100755 --- a/R/NAMESPACE.R +++ b/R/NAMESPACE.R @@ -1,6 +1,6 @@ #' @exportPattern ^[a-zA-Z] #' @import DESeq2 -#' @import apeglm +#' @import ashr #' @import edgeR #' @import ggplot2 #' @import kableExtra diff --git a/R/descriptionPlots.r b/R/descriptionPlots.r index 3c4cf78..9c74fc1 100755 --- a/R/descriptionPlots.r +++ b/R/descriptionPlots.r @@ -5,11 +5,12 @@ #' @param counts \code{matrix} of counts #' @param group factor vector of the condition from which each sample belongs #' @param col colors for the plots (one per biological condition) -#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default) +#' @param ggplot_theme ggplot2 theme function (\code{theme_light()} by default) #' @return PNG files in the "figures" directory and the matrix of the most expressed sequences #' @author Hugo Varet -descriptionPlots <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), ggplot_theme=theme_gray()){ +descriptionPlots <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), + ggplot_theme=theme_light()){ # create the figures directory if does not exist if (!I("figures" %in% dir())) dir.create("figures", showWarnings=FALSE) diff --git a/R/exploreCounts.R b/R/exploreCounts.R index e0585cf..5b390bb 100755 --- a/R/exploreCounts.R +++ b/R/exploreCounts.R @@ -7,13 +7,13 @@ #' @param typeTrans transformation method for PCA/clustering with DESeq2: \code{"VST"} or \code{"rlog"} #' @param gene.selection selection of the features in MDSPlot (\code{"pairwise"} by default) #' @param col colors used for the PCA/MDS (one per biological condition) -#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default) +#' @param ggplot_theme ggplot2 theme function (\code{theme_light()} by default) #' @return A list containing the dds object and the results object #' @author Hugo Varet exploreCounts <- function(object, group, typeTrans="VST", gene.selection="pairwise", col=c("lightblue","orange","MediumVioletRed","SpringGreen"), - ggplot_theme=theme_gray()){ + ggplot_theme=theme_light()){ if (class(object)=="DESeqDataSet"){ if (typeTrans == "VST") counts.trans <- assay(varianceStabilizingTransformation(object)) else counts.trans <- assay(rlogTransformation(object)) diff --git a/R/run.DESeq2.r b/R/run.DESeq2.r index 3ace739..15405f1 100755 --- a/R/run.DESeq2.r +++ b/R/run.DESeq2.r @@ -39,14 +39,17 @@ run.DESeq2 <- function(counts, target, varInt, batch=NULL, for (comp in combn(nlevels(colData(dds)[,varInt]), 2, simplify=FALSE)){ levelRef <- levels(colData(dds)[,varInt])[comp[1]] levelTest <- levels(colData(dds)[,varInt])[comp[2]] - res <- results(dds, name=paste(c(varInt, levelTest, "vs", levelRef), collapse="_"), + res <- results(dds, + contrast=c(varInt, levelTest, levelRef), pAdjustMethod=pAdjustMethod, cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha) - lfcs <- lfcShrink(dds, res=res, coef=paste(c(varInt, levelTest, "vs", levelRef), collapse="_"), type="apeglm") + lfcs <- lfcShrink(dds, res=res, + contrast=c(varInt, levelTest, levelRef), + type="ashr", quiet=TRUE) res$log2FoldChange <- lfcs$log2FoldChange results[[paste0(levelTest,"_vs_",levelRef)]] <- res cat(paste("Comparison", levelTest, "vs", levelRef, "done\n")) } - return(list(dds=dds,results=results,sf=sizeFactors(dds))) + return(list(dds=dds, results=results, sf=sizeFactors(dds))) } diff --git a/R/summarizeResults.DESeq2.r b/R/summarizeResults.DESeq2.r index 0a3694c..585094c 100755 --- a/R/summarizeResults.DESeq2.r +++ b/R/summarizeResults.DESeq2.r @@ -10,13 +10,13 @@ #' @param col colors for the plots #' @param log2FClim numeric vector containing both upper and lower y-axis limits for all the MA-plots produced (NULL by default to set them automatically) #' @param padjlim numeric value between 0 and 1 for the adjusted p-value upper limits for all the volcano plots produced (NULL by default to set them automatically) -#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default) +#' @param ggplot_theme ggplot2 theme function (\code{theme_light()} by default) #' @return A list containing: (i) a list of \code{data.frames} from \code{exportResults.DESeq2()}, (ii) the table summarizing the independent filtering procedure and (iii) a table summarizing the number of differentially expressed features #' @author Hugo Varet summarizeResults.DESeq2 <- function(out.DESeq2, group, independentFiltering=TRUE, cooksCutoff=TRUE, alpha=0.05, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), - log2FClim=NULL, padjlim=NULL, ggplot_theme=theme_gray()){ + log2FClim=NULL, padjlim=NULL, ggplot_theme=theme_light()){ # create the figures/tables directory if does not exist if (!I("figures" %in% dir())) dir.create("figures", showWarnings=FALSE) if (!I("tables" %in% dir())) dir.create("tables", showWarnings=FALSE) diff --git a/R/summarizeResults.edgeR.r b/R/summarizeResults.edgeR.r index dcd9213..3a5de18 100755 --- a/R/summarizeResults.edgeR.r +++ b/R/summarizeResults.edgeR.r @@ -9,13 +9,13 @@ #' @param col colors for the plots #' @param log2FClim numeric vector containing both upper and lower y-axis limits for all the MA-plots produced (NULL by default to set them automatically) #' @param padjlim numeric value between 0 and 1 for the adjusted p-value upper limits for all the volcano plots produced (NULL by default to set them automatically) -#' @param ggplot_theme ggplot2 theme function (\code{theme_gray()} by default) +#' @param ggplot_theme ggplot2 theme function (\code{theme_light()} by default) #' @return A list containing: (i) a list of \code{data.frames} from \code{exportResults.edgeR()} and (ii) a table summarizing the number of differentially expressed features #' @author Hugo Varet summarizeResults.edgeR <- function(out.edgeR, group, counts, alpha=0.05, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), - log2FClim=NULL, padjlim=NULL, ggplot_theme=theme_gray()){ + log2FClim=NULL, padjlim=NULL, ggplot_theme=theme_light()){ # create the figures/tables directory if does not exist if (!I("figures" %in% dir())) dir.create("figures", showWarnings=FALSE) if (!I("tables" %in% dir())) dir.create("tables", showWarnings=FALSE) diff --git a/inst/bibliography.bib b/inst/bibliography.bib index 4b9d5dc..cfdbb4c 100755 --- a/inst/bibliography.bib +++ b/inst/bibliography.bib @@ -232,3 +232,19 @@ @article{zhu2018 url = {https://doi.org/10.1093/bioinformatics/bty895}, eprint = {https://academic.oup.com/bioinformatics/article-pdf/35/12/2084/28839676/bty895.pdf}, } + +@article{ashr, + author = {Stephens, Matthew}, + title = "{False discovery rates: a new deal}", + journal = {Biostatistics}, + volume = {18}, + number = {2}, + pages = {275-294}, + year = {2016}, + month = {10}, + abstract = "{We introduce a new Empirical Bayes approach for large-scale hypothesis testing, including estimating false discovery rates (FDRs), and effect sizes. This approach has two key differences from existing approaches to FDR analysis. First, it assumes that the distribution of the actual (unobserved) effects is unimodal, with a mode at 0. This “unimodal assumption” (UA), although natural in many contexts, is not usually incorporated into standard FDR analysis, and we demonstrate how incorporating it brings many benefits. Specifically, the UA facilitates efficient and robust computation—estimating the unimodal distribution involves solving a simple convex optimization problem—and enables more accurate inferences provided that it holds. Second, the method takes as its input two numbers for each test (an effect size estimate and corresponding standard error), rather than the one number usually used (\\$p\\$ value or \\$z\\$ score). When available, using two numbers instead of one helps account for variation in measurement precision across tests. It also facilitates estimation of effects, and unlike standard FDR methods, our approach provides interval estimates (credible regions) for each effect in addition to measures of significance. To provide a bridge between interval estimates and significance measures, we introduce the term “local false sign rate” to refer to the probability of getting the sign of an effect wrong and argue that it is a superior measure of significance than the local FDR because it is both more generally applicable and can be more robustly estimated. Our methods are implemented in an R package ashr available from http://github.com/stephens999/ashr.}", + issn = {1465-4644}, + doi = {10.1093/biostatistics/kxw041}, + url = {https://doi.org/10.1093/biostatistics/kxw041}, + eprint = {https://academic.oup.com/biostatistics/article-pdf/18/2/275/11057424/kxw041.pdf}, +} diff --git a/inst/report_DESeq2.rmd b/inst/report_DESeq2.rmd index b7ce8cf..4e09fd9 100755 --- a/inst/report_DESeq2.rmd +++ b/inst/report_DESeq2.rmd @@ -226,7 +226,7 @@ Figure 14 shows the volcano plots for the comparisons performed and differential -Note that the log2(Fold-Changes) are shrunk using the "apeglm" method that has been shown to be more robust than the original "normal" method [@zhu2018]. +Note that the log2(Fold-Changes) are shrunk using the "ashr" method that has been shown to be more robust than the original "normal" method [@ashr]. Full results as well as lists of differentially expressed features are provided in the following text files which can be easily read in a spreadsheet. For each comparison: diff --git a/man/descriptionPlots.Rd b/man/descriptionPlots.Rd index 8104cf0..fae5c55 100644 --- a/man/descriptionPlots.Rd +++ b/man/descriptionPlots.Rd @@ -8,7 +8,7 @@ descriptionPlots( counts, group, col = c("lightblue", "orange", "MediumVioletRed", "SpringGreen"), - ggplot_theme = theme_gray() + ggplot_theme = theme_light() ) } \arguments{ @@ -18,7 +18,7 @@ descriptionPlots( \item{col}{colors for the plots (one per biological condition)} -\item{ggplot_theme}{ggplot2 theme function (\code{theme_gray()} by default)} +\item{ggplot_theme}{ggplot2 theme function (\code{theme_light()} by default)} } \value{ PNG files in the "figures" directory and the matrix of the most expressed sequences diff --git a/man/exploreCounts.Rd b/man/exploreCounts.Rd index 1635b48..8311e18 100644 --- a/man/exploreCounts.Rd +++ b/man/exploreCounts.Rd @@ -10,7 +10,7 @@ exploreCounts( typeTrans = "VST", gene.selection = "pairwise", col = c("lightblue", "orange", "MediumVioletRed", "SpringGreen"), - ggplot_theme = theme_gray() + ggplot_theme = theme_light() ) } \arguments{ @@ -24,7 +24,7 @@ exploreCounts( \item{col}{colors used for the PCA/MDS (one per biological condition)} -\item{ggplot_theme}{ggplot2 theme function (\code{theme_gray()} by default)} +\item{ggplot_theme}{ggplot2 theme function (\code{theme_light()} by default)} } \value{ A list containing the dds object and the results object diff --git a/man/summarizeResults.DESeq2.Rd b/man/summarizeResults.DESeq2.Rd index 94b5a6d..450a6ee 100644 --- a/man/summarizeResults.DESeq2.Rd +++ b/man/summarizeResults.DESeq2.Rd @@ -13,7 +13,7 @@ summarizeResults.DESeq2( col = c("lightblue", "orange", "MediumVioletRed", "SpringGreen"), log2FClim = NULL, padjlim = NULL, - ggplot_theme = theme_gray() + ggplot_theme = theme_light() ) } \arguments{ @@ -33,7 +33,7 @@ summarizeResults.DESeq2( \item{padjlim}{numeric value between 0 and 1 for the adjusted p-value upper limits for all the volcano plots produced (NULL by default to set them automatically)} -\item{ggplot_theme}{ggplot2 theme function (\code{theme_gray()} by default)} +\item{ggplot_theme}{ggplot2 theme function (\code{theme_light()} by default)} } \value{ A list containing: (i) a list of \code{data.frames} from \code{exportResults.DESeq2()}, (ii) the table summarizing the independent filtering procedure and (iii) a table summarizing the number of differentially expressed features diff --git a/man/summarizeResults.edgeR.Rd b/man/summarizeResults.edgeR.Rd index ba99792..79e38d5 100644 --- a/man/summarizeResults.edgeR.Rd +++ b/man/summarizeResults.edgeR.Rd @@ -12,7 +12,7 @@ summarizeResults.edgeR( col = c("lightblue", "orange", "MediumVioletRed", "SpringGreen"), log2FClim = NULL, padjlim = NULL, - ggplot_theme = theme_gray() + ggplot_theme = theme_light() ) } \arguments{ @@ -30,7 +30,7 @@ summarizeResults.edgeR( \item{padjlim}{numeric value between 0 and 1 for the adjusted p-value upper limits for all the volcano plots produced (NULL by default to set them automatically)} -\item{ggplot_theme}{ggplot2 theme function (\code{theme_gray()} by default)} +\item{ggplot_theme}{ggplot2 theme function (\code{theme_light()} by default)} } \value{ A list containing: (i) a list of \code{data.frames} from \code{exportResults.edgeR()} and (ii) a table summarizing the number of differentially expressed features diff --git a/template_script_DESeq2.r b/template_script_DESeq2.r index 9660af9..4ca1478 100755 --- a/template_script_DESeq2.r +++ b/template_script_DESeq2.r @@ -1,8 +1,8 @@ ################################################################################ ### R script to compare several conditions with the SARTools and DESeq2 packages ### Hugo Varet -### November 28th, 2019 -### designed to be executed with SARTools 1.7.4 +### March 23rd, 2022 +### designed to be executed with SARTools 1.8.1 ################################################################################ ################################################################################ diff --git a/template_script_DESeq2_CL.r b/template_script_DESeq2_CL.r index e22a94d..4c8ac16 100755 --- a/template_script_DESeq2_CL.r +++ b/template_script_DESeq2_CL.r @@ -1,8 +1,8 @@ ################################################################################ ### R script to compare several conditions with the SARTools and DESeq2 packages ### Hugo Varet -### November 28th, 2019 -### designed to be executed with SARTools 1.7.4 +### March 23rd, 2022 +### designed to be executed with SARTools 1.8.1 ### run "Rscript template_script_DESeq2_CL.r --help" to get some help ################################################################################ diff --git a/template_script_edgeR.r b/template_script_edgeR.r index 400f578..c61c383 100755 --- a/template_script_edgeR.r +++ b/template_script_edgeR.r @@ -1,8 +1,8 @@ ################################################################################ ### R script to compare several conditions with the SARTools and edgeR packages ### Hugo Varet -### November 28th, 2019 -### designed to be executed with SARTools 1.7.4 +### March 23rd, 2022 +### designed to be executed with SARTools 1.8.1 ################################################################################ ################################################################################ diff --git a/template_script_edgeR_CL.r b/template_script_edgeR_CL.r index 760e937..908b224 100755 --- a/template_script_edgeR_CL.r +++ b/template_script_edgeR_CL.r @@ -1,8 +1,8 @@ ################################################################################ ### R script to compare several conditions with the SARTools and edgeR packages ### Hugo Varet -### November 28th, 2019 -### designed to be executed with SARTools 1.7.4 +### March 23rd, 2022 +### designed to be executed with SARTools 1.8.1 ### run "Rscript template_script_edgeR_CL.r --help" to get some help ################################################################################