Skip to content

Commit 3bcf3f1

Browse files
author
candicechu
committed
add file
1 parent b91e3a3 commit 3bcf3f1

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

run_edger.R

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
library(edgeR)
2+
3+
# From: http://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script
4+
# So we can copy this script to where the analysis output is saved
5+
thisFile <- function() {
6+
cmdArgs <- commandArgs(trailingOnly = FALSE)
7+
needle <- "--file="
8+
match <- grep(needle, cmdArgs)
9+
if (length(match) > 0) {
10+
# Rscript
11+
return(normalizePath(sub(needle, "", cmdArgs[match])))
12+
} else {
13+
# 'source'd via R console
14+
return(normalizePath(sys.frames()[[1]]$ofile))
15+
}
16+
}
17+
18+
#args = c("lists/organoid","keys/organoid.csv")
19+
args = c("lists/candice_list","keys/candice-key.csv")
20+
21+
# Get counts file from analysis/fname/fname.T.csv
22+
bname = basename(args[1])
23+
fname = paste(bname,"-count.T.csv",sep='')
24+
dat = read.csv(file.path("analysis",bname,fname), header = TRUE, row.names=1)
25+
26+
# Filter low count reads
27+
keep = rowSums(cpm(dat) > 3) >= 3
28+
counts = dat[keep,]
29+
30+
## Read in key file
31+
key = read.csv(args[2], header=TRUE, row.names=1)
32+
33+
#############################################
34+
## Create model comparison matrix with sample key
35+
#############################################
36+
# Lampe Biopsy main
37+
sel = grepl("MT-.*", rownames(counts)) + grepl("ERCC-.*", rownames(counts)) + grepl("mt-.*", rownames(counts))
38+
# We have to filter out 6123 because its not full rank
39+
# > summary(factor(sapply(names(counts), function(x) substr(x,2,5))))
40+
# 6102 6103 6105 6106 6108 6110 6121 6122 6123 6127
41+
# 8 8 8 8 8 8 8 8 6 8
42+
counts = counts[!sel,]
43+
# counts = counts[!sel,!grepl("6123", names(counts))]
44+
# key = key[!grepl("6123", rownames(key)),]
45+
ename = "edger-pair-treatment"
46+
factors = key[order(rownames(key)), c(sample,type)]
47+
factors$type = factor(factors$type)
48+
# factors$treatment = relevel(factors$treatment, "placebo")
49+
design = model.matrix(~sample+type, data=factors)
50+
groups = factors$type
51+
#groups = factor(paste(key$tissue,key$location,sep='.'))
52+
#############################################
53+
system(paste("mkdir -p ",file.path("analysis",bname,ename)))
54+
file.copy(thisFile(), file.path("analysis", bname, ename, "edger_script.R"))
55+
56+
57+
counts = counts[,order(names(counts))]
58+
59+
########################
60+
# run Pairwise analysis ...
61+
########################
62+
# y = DGEList(counts=counts, group=factors)
63+
# y = calcNormFactors(y)
64+
# y = estimateCommonDisp(y)
65+
# y = estimateTagwiseDisp(y)
66+
67+
########################
68+
# or run GLM analysis
69+
########################
70+
y = DGEList(counts=counts)
71+
y = calcNormFactors(y)
72+
y = estimateGLMCommonDisp(y, design)
73+
y = estimateGLMTrendedDisp(y, design)
74+
y = estimateGLMTagwiseDisp(y, design)
75+
fit = glmFit(y, design)
76+
77+
## get normalized counts for each group for outputting to summary spreadsheet
78+
scaled.counts = data.frame(mapply(`*`, counts, y$samples$lib.size *
79+
y$samples$norm.factors/mean(y$samples$lib.size)))
80+
rownames(scaled.counts) = rownames(counts)
81+
dfs = split.data.frame(t(scaled.counts), groups)
82+
dfss = sapply(dfs, colMeans)
83+
84+
#group_names = levels(groups)
85+
#group_names_means = sapply(group_names, function(x) paste("mean_",x,sep=""), USE.NAMES=FALSE)
86+
#colnames(dfss) = group_names_means
87+
88+
#### Write results
89+
run_analysis = function(outfile, contrast=NULL, coef=NULL) {
90+
# Pairwise test
91+
# lrt = exactTest(y)
92+
# GLM Test
93+
lrt = glmLRT(fit, contrast=contrast, coef=coef)
94+
95+
ot1 = topTags(lrt,n=nrow(counts),sort.by="PValue")$table
96+
#if (is.null(contrast)) {
97+
#sel = which(as.logical(contrast))
98+
#ot1 = merge(ot1, dfss[,sel], by=0)
99+
#} else {
100+
#ot1 = merge(ot1, dfss, by=0)
101+
#}
102+
ot1 = merge(ot1, dfss, by=0)
103+
ot1 = ot1[order(ot1$FDR),] # Sort by ascending FDR
104+
write.csv(ot1,outfile,row.names=FALSE)
105+
106+
if (!is.null(lrt$table$logFC)){
107+
detags = rownames(ot1)[ot1$FDR < 0.05]
108+
png(paste(outfile,".png",sep=""))
109+
plotSmear(lrt, de.tags=detags)
110+
abline(h=c(-2,2),col="blue")
111+
dev.off()
112+
}
113+
sink(file=file.path(dirname(outfile), "summary.txt"), append=TRUE)
114+
print(outfile)
115+
print(summary(decideTestsDGE(lrt, p=0.05, adjust="BH")))
116+
sink(NULL)
117+
118+
#print(cpm(y)[detags,])
119+
#print(summary(decideTestsDGE(lrt, p=0.05, adjust="none")))
120+
}
121+
122+
meta_run = function(coef) {run_analysis(file.path("analysis",bname,ename,paste(colnames(design)[coef],".csv",sep="")),coef=coef)}
123+
124+
meta_run(dim(design)[2])
125+
meta_run(dim(design)[2]-1)
126+
meta_run(dim(design)[2]-2)
127+
128+
system(paste("cat",file.path("analysis",bname,ename,"summary.txt")))
129+
130+
# Pairwise test
131+
# run_analysis(file.path("analysis",bname,paste(ename,".csv",sep="")))
132+
133+
## output MA, MDS, etc.., plots
134+
png(file.path("analysis",bname,ename,"edger-mds.png"))
135+
p = plotMDS(y)
136+
dev.off()
137+
138+
png(file.path("analysis",bname,ename,"edger-bcv.png"))
139+
p = plotBCV(y)
140+
dev.off()

0 commit comments

Comments
 (0)