update software versions and prepare for explicit result creation #12

epigen · May 26, 2024 · fa938d9 · fa938d9
1 parent cb3caa3
commit fa938d9
Show file tree

Hide file tree

Showing 9 changed files with 72 additions and 21 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -6,6 +6,53 @@ import pandas as pd
 import yaml
 from snakemake.utils import min_version
 
+######## CODE BELOW solves Missing Input Exception when used as input module for e.g., enrichment_analysis BUT requires Snakemake 8
+# # Global workflow dependency
+# # https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#global-workflow-dependencies
+# conda:
+#     "envs/global.yaml"
+# from patsy import dmatrix
+
+# import pandas as pd
+# from patsy import dmatrix
+
+# def get_r_style_column_names(formula, file_path, reference_levels=None):
+#     # Load the data from the file
+#     df = pd.read_csv(file_path)
+
+#     # Set reference levels if provided
+#     if reference_levels:
+#         for var, ref_level in reference_levels.items():
+#             if var in df.columns:
+#                 levels = list(df[var].unique())
+#                 if ref_level in levels:
+#                     levels.remove(ref_level)
+#                     levels = [ref_level] + levels
+#                     df[var] = pd.Categorical(df[var], categories=levels, ordered=True)
+
+#     # Create the design matrix
+#     X = dmatrix(formula, data=df, return_type='dataframe')
+
+#     # Convert to the correct R-style column names
+#     def convert_to_r_style(col_name):
+#         # Remove brackets and handle interaction terms
+#         col_name = col_name.replace('[', '').replace(']', '')
+#         col_name = col_name.replace('T.', '')
+#         return col_name
+
+#     r_style_column_names = [convert_to_r_style(col) for col in X.columns]
+
+#     return r_style_column_names
+
+# # Apply the function to the provided data and formula
+# file_path = '/mnt/data/annotation (1).csv'
+# formula = '0 + celltype + celltype:genotype'
+# reference_levels = {
+#     'genotype': 'WT'
+# }
+# r_style_column_names = get_r_style_column_names(formula, file_path, reference_levels)
+# print(r_style_column_names)
+
 ##### set minimum snakemake version #####
 min_version("7.15.2")
 

diff --git a/workflow/envs/ggplot.yaml b/workflow/envs/ggplot.yaml
@@ -3,5 +3,4 @@ channels:
   - defaults
 dependencies:
   - r-ggplot2=3.3.6
-  - r-patchwork=1.1.2
-  - r-data.table
+  - r-data.table=1.15.2
diff --git a/workflow/envs/global.yaml b/workflow/envs/global.yaml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - patsy=0.5.6
diff --git a/workflow/envs/heatmap.yaml b/workflow/envs/heatmap.yaml
@@ -3,10 +3,9 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - r-ggplot2=3.3.5
-  - r-ggplotify=0.1.0
-  - r-patchwork=1.1.1
+  - r-ggplot2=3.5.1
+  - r-ggplotify=0.1.2
   - r-pheatmap=1.0.12
   - r-reshape2=1.4.4
-  - r-stringi=1.7.3
-  - r-data.table
+  - r-gtable=0.3.5
+  - r-data.table=1.15.2
diff --git a/workflow/envs/limma.yaml b/workflow/envs/limma.yaml
@@ -6,4 +6,4 @@ dependencies:
   - bioconductor-limma=3.46.0
   - bioconductor-edger=3.32.1
   - r-statmod=1.4.37
-  - r-data.table
+  - r-data.table=1.14.2
diff --git a/workflow/envs/volcanos.yaml b/workflow/envs/volcanos.yaml
@@ -3,6 +3,6 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - bioconductor-enhancedvolcano=1.12.0
-  - r-patchwork=1.1.2
-  - r-data.table
+  - bioconductor-enhancedvolcano=1.20.0
+  - r-ggplot2=3.5.1
+  - r-data.table=1.15.2
diff --git a/workflow/scripts/aggregate.R b/workflow/scripts/aggregate.R
@@ -1,6 +1,6 @@
 #### load libraries & utility function 
 library("ggplot2")
-library("patchwork")
+# library("patchwork")
 library("data.table")
 # library(reshape2)
 
@@ -124,7 +124,7 @@ plot_stats_df$groups <- rep(rownames(dea_filtered_stats_df), ncol(dea_filtered_s
 # plot
 dea_filtered_results_p <- ggplot(plot_stats_df, aes(x=groups, y=n_features, fill=direction)) + 
                                              geom_bar(stat="identity", position="identity") +
-                                             xlab(metadata) +
+                                             xlab("groups") +
                                              ylab("number of differential features") +
                                              scale_fill_manual(values=list("down"="blue", "up"="red"), drop=FALSE) +
                                              scale_y_continuous(labels = function(y) sapply(y, function(y) ifelse(y < 0, paste0(sub("-", "", as.character(y))), y))) +

diff --git a/workflow/scripts/heatmap.R b/workflow/scripts/heatmap.R
@@ -1,6 +1,6 @@
 #### load libraries & utility function 
 library("pheatmap")
-library("patchwork")
+# library("patchwork")
 library("ggplot2")
 library("ggplotify")
 library("reshape2")

diff --git a/workflow/scripts/volcanos.R b/workflow/scripts/volcanos.R
@@ -1,6 +1,6 @@
 #### load libraries & utility function 
 library("EnhancedVolcano", quietly=TRUE)
-library("patchwork", quietly=TRUE)
+# library("patchwork", quietly=TRUE)
 library("ggplot2")
 library("data.table")
 
@@ -75,14 +75,14 @@ for (pval_type in c("adj.P.Val", "P.Value")){
                 # sort results by feature list so they are on top in the plot
                 toptable <- toptable[order(toptable$feature_list),]
                 # highlight features of interest
-                keyvals.alpha <- ifelse(toptable$feature_name %in% feature_list, 1, 0.5)
+                keyvals.alpha <- ifelse(toptable$feature_name %in% feature_list, 0.5, 0.25)
                 keyvals.col <- ifelse(toptable$feature_name %in% feature_list, "red", "grey")
             }else{
                 toptable$feature_list <- ifelse(toptable$feature %in% feature_list, TRUE, FALSE)
                 # sort results by feature list so they are on top in the plot
                 toptable <- toptable[order(toptable$feature_list),]
                 # highlight features of interest
-                keyvals.alpha <- ifelse(toptable$feature %in% feature_list, 1, 0.5)
+                keyvals.alpha <- ifelse(toptable$feature %in% feature_list, 0.5, 0.25)
                 keyvals.col <- ifelse(toptable$feature %in% feature_list, "red", "grey")
             }
 
@@ -125,8 +125,8 @@ for (pval_type in c("adj.P.Val", "P.Value")){
                         cutoffLineType = "longdash",
                         cutoffLineCol = "black",
                         cutoffLineWidth = 0.4,
-                        pointSize = 1, # default: 2
-                        labSize = 3, #default: 5
+                        pointSize = 0.5, # default: 2
+                        labSize = 2, #default: 5
                         labCol = "black",
                         labFace = "plain",
                         boxedLabels = TRUE, #default: FALSE
@@ -187,8 +187,8 @@ for (pval_type in c("adj.P.Val", "P.Value")){
 #                    paste0("DEA_volcanos_",feature_list_name,"_",pval_type), 
                    results_path=volcano_plot_path, 
                    plot=volcano_plot, 
-                   width=width_panel, 
-                   height=height_panel)    
+                   width=width, 
+                   height=height)    
     }
 }