Skip to content

Commit 4f75631

Browse files
committed
testing with github_document for rmd
1 parent 34062d6 commit 4f75631

File tree

1 file changed

+57
-57
lines changed

1 file changed

+57
-57
lines changed

src/dge_analysis/deseq2.Rmd

Lines changed: 57 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
title: "RNA-Seq differential gene expression analysis for muscular dystrophy samples"
33
author: "Gurpreet Kaur"
44
date: "`r Sys.Date()`"
5-
output: html_document
5+
output: github_document
66
---
77

88
```{r setup, include=FALSE}
@@ -14,7 +14,7 @@ getwd()
1414
***
1515
# BiocManager Installation
1616
```{r, BiocManager Installation}
17-
if (!requireNamespace("BiocManager", quietly = TRUE))
17+
if (!requireNamespace("BiocManager", quietly = TRUE))
1818
install.packages("BiocManager")
1919
```
2020

@@ -49,15 +49,15 @@ if (!dir.exists("deseq2/results/batch-corr_limma/plotCounts")) {
4949
```{r, read counts file}
5050
library(readr)
5151
counts = read_tsv(file.path(getwd(),"star_salmon","salmon.merged.gene_counts_length_scaled.tsv"))
52-
counts = data.frame(counts, row.names = 1)
52+
counts = data.frame(counts, row.names = 1)
5353
counts$Ensembl_ID = row.names(counts)
5454
drop = c("Ensembl_ID","gene_name")
55-
gene_names = counts[,drop]
56-
counts = counts[ , !(names(counts) %in% drop)]
55+
gene_names = counts[,drop]
56+
counts = counts[ , !(names(counts) %in% drop)]
5757
head(counts,5)
5858
```
5959

60-
```{r}
60+
```{r}
6161
# Analyze sample-related data (metadata), edit for desired columns, make in same sample order as per counts and read in
6262
library(readr)
6363
samples_all4deseq = read_csv(file.path(getwd(),"data","samples_all_edit.csv"))
@@ -66,23 +66,23 @@ View(samples_all4deseq)
6666
```
6767

6868
```{r}
69-
# Check consistency in samples name and order in counts and samples_all4deseq
69+
# Check consistency in samples name and order in counts and samples_all4deseq
7070
## Edit and read file (sample_all_edit.csv) again if not TRUE
71-
all(rownames(samples_all4deseq) %in% colnames(counts))
72-
all(rownames(samples_all4deseq) == colnames(counts))
71+
all(rownames(samples_all4deseq) %in% colnames(counts))
72+
all(rownames(samples_all4deseq) == colnames(counts))
7373
```
7474

7575
# 2) DESeq2 Analysis
7676
```{r, assign factors for DESeq2 design}
7777
library(DESeq2) # DESeq2_1.36.0; R version 4.2.3
7878
samples_all4deseq
79-
samples_all4deseq$Family = factor(samples_all4deseq$Family)
80-
samples_all4deseq$Affected = factor(samples_all4deseq$Affected)
81-
samples_all4deseq$Replicate = factor(samples_all4deseq$Replicate)
82-
samples_all4deseq$Batch = factor(samples_all4deseq$Batch)
79+
samples_all4deseq$Family = factor(samples_all4deseq$Family)
80+
samples_all4deseq$Affected = factor(samples_all4deseq$Affected)
81+
samples_all4deseq$Replicate = factor(samples_all4deseq$Replicate)
82+
samples_all4deseq$Batch = factor(samples_all4deseq$Batch)
8383
# Need later for labelling
84-
samples_all4deseq$Gender = factor(samples_all4deseq$Gender)
85-
samples_all4deseq$Relation = factor(samples_all4deseq$Relation)
84+
samples_all4deseq$Gender = factor(samples_all4deseq$Gender)
85+
samples_all4deseq$Relation = factor(samples_all4deseq$Relation)
8686
```
8787

8888
```{r, Affected_Yes_vs_No}
@@ -92,7 +92,7 @@ dim(dds)
9292

9393
```{r}
9494
# Pre-filtering: Keep only rows that have atleast 10 reads total
95-
keep = rowSums(counts(dds)) >= 10
95+
keep = rowSums(counts(dds)) >= 10
9696
dds = dds[keep,]
9797
dim(dds)
9898
```
@@ -114,9 +114,9 @@ counts_norm = counts(dds, normalized=TRUE)
114114
```
115115

116116
***
117-
# 3) Count data transformation
117+
# 3) Count data transformation
118118
```{r}
119-
vsd = vst(dds, blind=FALSE)
119+
vsd = vst(dds, blind=FALSE)
120120
```
121121

122122
***
@@ -144,9 +144,9 @@ res_aff_vs_unaff = results(dds, contrast=c("Affected", "Yes", "No"))
144144
res_aff_vs_unaff= res_aff_vs_unaff[order(res_aff_vs_unaff$padj),]
145145
head(res_aff_vs_unaff)
146146
summary(res_aff_vs_unaff)
147-
write.csv(res_aff_vs_unaff, file="./deseq2/results/batch-corr_limma/res_aff_vs_unaff.csv")
147+
write.csv(res_aff_vs_unaff, file="./deseq2/results/batch-corr_limma/res_aff_vs_unaff.csv")
148148
res_aff_vs_unaff_df = as.data.frame(res_aff_vs_unaff)
149-
res_aff_vs_unaff_05 = subset(res_aff_vs_unaff_df, padj < 0.05) # 1957 obs.
149+
res_aff_vs_unaff_05 = subset(res_aff_vs_unaff_df, padj < 0.05) # 1957 obs.
150150
```
151151

152152
## (ii) plotCounts: Genes
@@ -165,7 +165,7 @@ row.names(samples_all4deseq_pid) = samples_all4deseq_pid$Samples_id
165165

166166
```{r}
167167
# Genes of interest (goi)
168-
genes = c("VMA21","ENO3")
168+
genes = c("VMA21","ENO3")
169169
vsd_limma_genename = merge(x=gene_names, y=assay(vsd_limma), by.x = "Ensembl_ID", by.y="row.names")
170170
goi = intersect(genes, vsd_limma_genename$gene_name)
171171
```
@@ -180,11 +180,11 @@ write.csv(goi_table, file="./deseq2/results/batch-corr_limma/res_aff_vs_unaff_df
180180
# Define colors for 8 families
181181
library(RColorBrewer)
182182
additional_colors <- brewer.pal(8, "Set1")
183-
183+
184184
# Remove two colors to replace with salmon and royalblue for specific families # for family 2 and 8
185-
additional_colors <- setdiff(additional_colors, c("salmon", "royalblue"))
185+
additional_colors <- setdiff(additional_colors, c("salmon", "royalblue"))
186186
# Function to map specific family values to colors
187-
assign_colors <- function(family_levels)
187+
assign_colors <- function(family_levels)
188188
{
189189
# Start by making a named list of colors, initially empty
190190
color_map <- setNames(vector("character", length(family_levels)), family_levels)
@@ -202,7 +202,7 @@ genes_oi_plot_Ensembl = goi_table$Ensembl_ID
202202
genes_oi_plot_gene_name = goi_table$gene_name
203203
genes_oi_plot_padj = goi_table$padj
204204
genes_oi_plot_log2FC = goi_table$log2FoldChange
205-
Family = factor(samples_all4deseq_pid$Family)
205+
Family = factor(samples_all4deseq_pid$Family)
206206
207207
library(DESeq2)
208208
library(ggplot2)
@@ -211,31 +211,31 @@ library(ggrepel)
211211
for (i in seq_along(genes_oi_plot_Ensembl)) {
212212
boxplot_counts = plotCounts(dds, gene=genes_oi_plot_Ensembl[i], intgroup=c("Affected"), returnData=TRUE, normalized = T)
213213
boxplot_counts$variable = row.names(boxplot_counts)
214-
214+
215215
# Add 'Patient_id' and 'Replicate' as new columns and update row names
216216
boxplot_counts$Replicate = samples_all4deseq_pid[rownames(boxplot_counts), "Replicate", drop = TRUE]
217217
boxplot_counts$Patient_id = samples_all4deseq_pid[rownames(boxplot_counts), "Patient_id", drop = TRUE]
218218
boxplot_counts$Family = samples_all4deseq_pid[rownames(boxplot_counts), "Family", drop = TRUE]
219219
rownames(boxplot_counts) = with(samples_all4deseq_pid[rownames(boxplot_counts), ], paste(Family, Relation, Replicate, sep = "_"))
220-
220+
221221
boxplot_counts$Family <- as.factor(boxplot_counts$Family)
222-
222+
223223
# Retrieve unique family values and assign colors
224224
family_levels <- levels(boxplot_counts$Family)
225225
family_colors <- assign_colors(family_levels)
226-
226+
227227
plot = ggplot(data=boxplot_counts, aes(x=Affected, y=count, fill=Affected)) +
228-
geom_boxplot(position=position_dodge()) +
229-
geom_jitter(position=position_dodge(.8)) +
230-
labs(title = paste("Gene",genes_oi_plot_gene_name[i],sep = "_",genes_oi_plot_Ensembl[i]),
231-
subtitle = paste("padj=",genes_oi_plot_padj[i],"; log2FC=",genes_oi_plot_log2FC[i])) +
232-
xlab("") + ylab("Normalized gene counts") +
233-
theme_bw() + theme(text = element_text(size=6), axis.text.x = element_text(angle=45, vjust=1,hjust=1)) +
234-
scale_fill_manual(values = c("No"= "grey90", "Yes"= "grey40")) +
228+
geom_boxplot(position=position_dodge()) +
229+
geom_jitter(position=position_dodge(.8)) +
230+
labs(title = paste("Gene",genes_oi_plot_gene_name[i],sep = "_",genes_oi_plot_Ensembl[i]),
231+
subtitle = paste("padj=",genes_oi_plot_padj[i],"; log2FC=",genes_oi_plot_log2FC[i])) +
232+
xlab("") + ylab("Normalized gene counts") +
233+
theme_bw() + theme(text = element_text(size=6), axis.text.x = element_text(angle=45, vjust=1,hjust=1)) +
234+
scale_fill_manual(values = c("No"= "grey90", "Yes"= "grey40")) +
235235
scale_color_manual(values = family_colors) +
236236
geom_point(data=boxplot_counts, aes(x=Affected, y=count, color=Family), size = 2) +
237-
geom_text_repel(aes(label = rownames(boxplot_counts), color=Family), min.segment.length = 0, max.overlaps = Inf, box.padding = 0.5)
238-
ggsave(filename=paste("Gene-of-interest",genes_oi_plot_gene_name[i],"_",genes_oi_plot_Ensembl[i],".png"),
237+
geom_text_repel(aes(label = rownames(boxplot_counts), color=Family), min.segment.length = 0, max.overlaps = Inf, box.padding = 0.5)
238+
ggsave(filename=paste("Gene-of-interest",genes_oi_plot_gene_name[i],"_",genes_oi_plot_Ensembl[i],".png"),
239239
width=10, height=5, plot=plot, path = "./deseq2/results/batch-corr_limma/plotCounts_noid")
240240
}
241241
```
@@ -259,19 +259,19 @@ res_2_vs_8_df_genename = res_2_vs_8_df_genename[order(res_2_vs_8_df_genename[,"p
259259
write.csv(res_2_vs_8_df_genename,file="./deseq2/results/batch-corr_limma/family2vs8/res_2_vs_8_genename.csv" )
260260
261261
# Subset: padj<0.05 }
262-
res_2_vs_8_df_genename_05= subset(res_2_vs_8_df_genename, padj < 0.05)
262+
res_2_vs_8_df_genename_05= subset(res_2_vs_8_df_genename, padj < 0.05)
263263
res_2_vs_8_df_genename_05 = res_2_vs_8_df_genename_05[order(res_2_vs_8_df_genename_05$padj),]
264264
head(res_2_vs_8_df_genename_05)
265265
summary(res_2_vs_8_df_genename_05)
266266
write.csv(res_2_vs_8_df_genename_05, file="./deseq2/results/batch-corr_limma/family2vs8/res_2_vs_8_df_genename_05.csv") # 137
267267
```
268268

269269
## (ii) Volcano plot
270-
```{r}
270+
```{r}
271271
#devtools::install_github('kevinblighe/EnhancedVolcano') # v1.13.2; FCcutoff = 1 (default)
272272
library(EnhancedVolcano)
273273
png("./deseq2/results/batch-corr_limma/family2vs8/EnhancedVolcano_res_2_vs_8_all-degs.png", width = 2600, height = 3000, res=300)
274-
EnhancedVolcano(res_2_vs_8_df_genename, lab = res_2_vs_8_df_genename$gene_name,
274+
EnhancedVolcano(res_2_vs_8_df_genename, lab = res_2_vs_8_df_genename$gene_name,
275275
x = 'log2FoldChange', y = 'padj', title = 'Family 2 vs 8', subtitle = 'All DEGs (padjcutoff=0.05, FCcutoff = 1)' ,
276276
pCutoff = 0.05, FCcutoff = 1, pointSize = 2.0, labSize = 3.0, legendLabels=c('Not sig.','log2FC','padj', 'padj & log2FC'))
277277
dev.off()
@@ -303,46 +303,46 @@ genes_oi_plot_Ensembl = goi_table_2_vs_8$Ensembl_ID
303303
genes_oi_plot_gene_name = goi_table_2_vs_8$gene_name
304304
genes_oi_plot_padj = goi_table_2_vs_8$padj
305305
genes_oi_plot_log2FC = goi_table_2_vs_8$log2FoldChange
306-
Family = factor(samples_family2and8$Family)
306+
Family = factor(samples_family2and8$Family)
307307
308308
for (i in seq_along(genes_oi_plot_Ensembl)) {
309309
boxplot_counts = plotCounts(dds, gene=genes_oi_plot_Ensembl[i], intgroup=c("Affected"), returnData=TRUE, normalized = T)
310310
boxplot_counts$variable = row.names(boxplot_counts)
311-
311+
312312
# Find intersection of rownames
313313
common_rownames = intersect(rownames(boxplot_counts), rownames(samples_family2and8)) # Family 2 and 8
314-
314+
315315
# Subset boxplot_counts based on common rownames
316316
boxplot_counts_2_and_8 = boxplot_counts[common_rownames, , drop=FALSE]
317-
317+
318318
# Add 'Patient_id' and 'Replicate' as new columns and update row names
319319
boxplot_counts_2_and_8$Replicate = samples_family2and8[rownames(boxplot_counts_2_and_8), "Replicate", drop = TRUE]
320320
boxplot_counts_2_and_8$Patient_id = samples_family2and8[rownames(boxplot_counts_2_and_8), "Patient_id", drop = TRUE]
321321
boxplot_counts_2_and_8$Family = samples_family2and8[rownames(boxplot_counts_2_and_8), "Family", drop = TRUE]
322322
rownames(boxplot_counts_2_and_8) = with(samples_family2and8[rownames(boxplot_counts_2_and_8), ], paste(Patient_id, Replicate, sep = "_"))
323323
324324
# Plot
325-
boxplot_counts_2_and_8$Family = as.factor(boxplot_counts_2_and_8$Family)
325+
boxplot_counts_2_and_8$Family = as.factor(boxplot_counts_2_and_8$Family)
326326
plot = ggplot2::ggplot(data=boxplot_counts_2_and_8, aes(x=Affected, y=count, fill=Affected)) +
327-
geom_boxplot(position=position_dodge()) +
328-
geom_jitter(position=position_dodge(.8)) +
329-
labs(title = paste("Gene",genes_oi_plot_gene_name[i],sep = "_",genes_oi_plot_Ensembl[i]),
330-
subtitle = paste("padj=",genes_oi_plot_padj[i],"; log2FC=",genes_oi_plot_log2FC[i])) +
331-
xlab("") + ylab("Normalized gene counts") +
332-
theme_bw() + theme(text = element_text(size=6), axis.text.x = element_text(angle=45, vjust=1,hjust=1)) +
333-
scale_fill_manual(values = c("No"= "grey90", "Yes"= "grey40")) +
327+
geom_boxplot(position=position_dodge()) +
328+
geom_jitter(position=position_dodge(.8)) +
329+
labs(title = paste("Gene",genes_oi_plot_gene_name[i],sep = "_",genes_oi_plot_Ensembl[i]),
330+
subtitle = paste("padj=",genes_oi_plot_padj[i],"; log2FC=",genes_oi_plot_log2FC[i])) +
331+
xlab("") + ylab("Normalized gene counts") +
332+
theme_bw() + theme(text = element_text(size=6), axis.text.x = element_text(angle=45, vjust=1,hjust=1)) +
333+
scale_fill_manual(values = c("No"= "grey90", "Yes"= "grey40")) +
334334
scale_color_manual(values = c("2" = "salmon", "8" = "royalblue")) +
335335
#geom_point(data=boxplot_counts_2_and_8, aes(x=Affected, y=count, color=Family), size = 2) +
336336
geom_point(data=boxplot_counts_2_and_8, aes(color=Family), size = 2) +
337-
ggrepel::geom_text_repel(aes(label = rownames(boxplot_counts_2_and_8), color=Family), min.segment.length = 0, max.overlaps = Inf, box.padding = 0.5)
338-
339-
ggsave(filename=paste("Gene-of-interest_",genes_oi_plot_gene_name[i],"_",genes_oi_plot_Ensembl[i],".png"),
337+
ggrepel::geom_text_repel(aes(label = rownames(boxplot_counts_2_and_8), color=Family), min.segment.length = 0, max.overlaps = Inf, box.padding = 0.5)
338+
339+
ggsave(filename=paste("Gene-of-interest_",genes_oi_plot_gene_name[i],"_",genes_oi_plot_Ensembl[i],".png"),
340340
width=10, height=5, plot=plot, path = "./deseq2/results/batch-corr_limma/family2vs8/plotCounts_goi")
341341
}
342342
```
343343

344344
***
345-
# Save packages info
345+
# Save packages info
346346
```{r}
347347
sink("./sessionInfo.txt")
348348
sessionInfo()

0 commit comments

Comments
 (0)