forked from CandiceChuDVM/RNA-Seq
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
candicechu
committed
Mar 24, 2016
1 parent
61b1757
commit 542db46
Showing
2 changed files
with
149 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
"sample","type" | ||
"DMSO1","control" | ||
"DMSO2","control" | ||
"DMSO3","control" | ||
"TCDD1","treatment" | ||
"TCDD2","treatment" | ||
"TCDD3","treatment" | ||
"sample","type","treatment" | ||
"DMSO1","control","DMSO" | ||
"DMSO2","control","DMSO" | ||
"DMSO3","control","DMSO" | ||
"TCDD1","treatment","TCDD" | ||
"TCDD2","treatment","TCDD" | ||
"TCDD3","treatment","TCDD" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,140 +1,142 @@ | ||
library(edgeR) | ||
|
||
# From: http://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script | ||
# So we can copy this script to where the analysis output is saved | ||
thisFile <- function() { | ||
cmdArgs <- commandArgs(trailingOnly = FALSE) | ||
needle <- "--file=" | ||
match <- grep(needle, cmdArgs) | ||
if (length(match) > 0) { | ||
# Rscript | ||
return(normalizePath(sub(needle, "", cmdArgs[match]))) | ||
} else { | ||
# 'source'd via R console | ||
return(normalizePath(sys.frames()[[1]]$ofile)) | ||
} | ||
} | ||
|
||
#args = c("lists/organoid","keys/organoid.csv") | ||
args = c("lists/candice_list","keys/candice-key.csv") | ||
|
||
# Get counts file from analysis/fname/fname.T.csv | ||
bname = basename(args[1]) | ||
fname = paste(bname,"-count.T.csv",sep='') | ||
dat = read.csv(file.path("analysis",bname,fname), header = TRUE, row.names=1) | ||
|
||
# Filter low count reads | ||
keep = rowSums(cpm(dat) > 3) >= 3 | ||
counts = dat[keep,] | ||
|
||
## Read in key file | ||
key = read.csv(args[2], header=TRUE, row.names=1) | ||
|
||
############################################# | ||
## Create model comparison matrix with sample key | ||
############################################# | ||
# Lampe Biopsy main | ||
sel = grepl("MT-.*", rownames(counts)) + grepl("ERCC-.*", rownames(counts)) + grepl("mt-.*", rownames(counts)) | ||
# We have to filter out 6123 because its not full rank | ||
# > summary(factor(sapply(names(counts), function(x) substr(x,2,5)))) | ||
# 6102 6103 6105 6106 6108 6110 6121 6122 6123 6127 | ||
# 8 8 8 8 8 8 8 8 6 8 | ||
counts = counts[!sel,] | ||
# counts = counts[!sel,!grepl("6123", names(counts))] | ||
# key = key[!grepl("6123", rownames(key)),] | ||
ename = "edger-pair-treatment" | ||
factors = key[order(rownames(key)), c(sample,type)] | ||
factors$type = factor(factors$type) | ||
# factors$treatment = relevel(factors$treatment, "placebo") | ||
design = model.matrix(~sample+type, data=factors) | ||
groups = factors$type | ||
#groups = factor(paste(key$tissue,key$location,sep='.')) | ||
############################################# | ||
system(paste("mkdir -p ",file.path("analysis",bname,ename))) | ||
file.copy(thisFile(), file.path("analysis", bname, ename, "edger_script.R")) | ||
|
||
|
||
counts = counts[,order(names(counts))] | ||
|
||
######################## | ||
# run Pairwise analysis ... | ||
######################## | ||
# y = DGEList(counts=counts, group=factors) | ||
# y = calcNormFactors(y) | ||
# y = estimateCommonDisp(y) | ||
# y = estimateTagwiseDisp(y) | ||
|
||
######################## | ||
# or run GLM analysis | ||
######################## | ||
y = DGEList(counts=counts) | ||
y = calcNormFactors(y) | ||
y = estimateGLMCommonDisp(y, design) | ||
y = estimateGLMTrendedDisp(y, design) | ||
y = estimateGLMTagwiseDisp(y, design) | ||
fit = glmFit(y, design) | ||
|
||
## get normalized counts for each group for outputting to summary spreadsheet | ||
scaled.counts = data.frame(mapply(`*`, counts, y$samples$lib.size * | ||
y$samples$norm.factors/mean(y$samples$lib.size))) | ||
rownames(scaled.counts) = rownames(counts) | ||
dfs = split.data.frame(t(scaled.counts), groups) | ||
dfss = sapply(dfs, colMeans) | ||
|
||
#group_names = levels(groups) | ||
#group_names_means = sapply(group_names, function(x) paste("mean_",x,sep=""), USE.NAMES=FALSE) | ||
#colnames(dfss) = group_names_means | ||
|
||
#### Write results | ||
run_analysis = function(outfile, contrast=NULL, coef=NULL) { | ||
# Pairwise test | ||
# lrt = exactTest(y) | ||
# GLM Test | ||
lrt = glmLRT(fit, contrast=contrast, coef=coef) | ||
|
||
ot1 = topTags(lrt,n=nrow(counts),sort.by="PValue")$table | ||
#if (is.null(contrast)) { | ||
#sel = which(as.logical(contrast)) | ||
#ot1 = merge(ot1, dfss[,sel], by=0) | ||
#} else { | ||
#ot1 = merge(ot1, dfss, by=0) | ||
#} | ||
ot1 = merge(ot1, dfss, by=0) | ||
ot1 = ot1[order(ot1$FDR),] # Sort by ascending FDR | ||
write.csv(ot1,outfile,row.names=FALSE) | ||
|
||
if (!is.null(lrt$table$logFC)){ | ||
detags = rownames(ot1)[ot1$FDR < 0.05] | ||
png(paste(outfile,".png",sep="")) | ||
plotSmear(lrt, de.tags=detags) | ||
abline(h=c(-2,2),col="blue") | ||
dev.off() | ||
} | ||
sink(file=file.path(dirname(outfile), "summary.txt"), append=TRUE) | ||
print(outfile) | ||
print(summary(decideTestsDGE(lrt, p=0.05, adjust="BH"))) | ||
sink(NULL) | ||
|
||
#print(cpm(y)[detags,]) | ||
#print(summary(decideTestsDGE(lrt, p=0.05, adjust="none"))) | ||
} | ||
|
||
meta_run = function(coef) {run_analysis(file.path("analysis",bname,ename,paste(colnames(design)[coef],".csv",sep="")),coef=coef)} | ||
|
||
meta_run(dim(design)[2]) | ||
meta_run(dim(design)[2]-1) | ||
meta_run(dim(design)[2]-2) | ||
|
||
system(paste("cat",file.path("analysis",bname,ename,"summary.txt"))) | ||
|
||
# Pairwise test | ||
# run_analysis(file.path("analysis",bname,paste(ename,".csv",sep=""))) | ||
|
||
## output MA, MDS, etc.., plots | ||
png(file.path("analysis",bname,ename,"edger-mds.png")) | ||
p = plotMDS(y) | ||
dev.off() | ||
|
||
png(file.path("analysis",bname,ename,"edger-bcv.png")) | ||
p = plotBCV(y) | ||
dev.off() | ||
library(edgeR) | ||
|
||
# From: http://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script | ||
# So we can copy this script to where the analysis output is saved | ||
thisFile <- function() { | ||
cmdArgs <- commandArgs(trailingOnly = FALSE) | ||
needle <- "--file=" | ||
match <- grep(needle, cmdArgs) | ||
if (length(match) > 0) { | ||
# Rscript | ||
return(normalizePath(sub(needle, "", cmdArgs[match]))) | ||
} else { | ||
# 'source'd via R console | ||
return(normalizePath(sys.frames()[[1]]$ofile)) | ||
} | ||
} | ||
|
||
#args = c("lists/organoid","keys/organoid.csv") | ||
#args = c("ahr-list","ahr-key") | ||
args = c("lists/candice_list","keys/candice-key.csv") | ||
|
||
# Get counts file from analysis/fname/fname.T.csv | ||
bname = basename(args[1]) | ||
fname = paste(bname,"-count.T.csv",sep='') | ||
dat = read.csv(file.path("analysis",bname,fname), header = TRUE, row.names=1) | ||
|
||
# Filter low count reads | ||
keep = rowSums(cpm(dat) > 3) >= 3 | ||
counts = dat[keep,] | ||
|
||
## Read in key file | ||
key = read.csv(args[2], header=TRUE, row.names=1) | ||
|
||
############################################# | ||
## Create model comparison matrix with sample key | ||
############################################# | ||
# Lampe Biopsy main | ||
sel = grepl("MT-.*", rownames(counts)) + grepl("ERCC-.*", rownames(counts)) + grepl("mt-.*", rownames(counts)) | ||
# We have to filter out 6123 because its not full rank | ||
# > summary(factor(sapply(names(counts), function(x) substr(x,2,5)))) | ||
# 6102 6103 6105 6106 6108 6110 6121 6122 6123 6127 | ||
# 8 8 8 8 8 8 8 8 6 8 | ||
counts = counts[!sel,] | ||
# counts = counts[!sel,!grepl("6123", names(counts))] | ||
# key = key[!grepl("6123", rownames(key)),] | ||
ename = "edger-pair-treatment2" | ||
factors = key[order(rownames(key)), c("type", "treatment")] | ||
# factors$idnum = factor(factors$idnum) | ||
# factors$treatment = relevel(factors$treatment, "placebo") | ||
design = model.matrix(~type+treatment, data=factors) | ||
groups = factors$treatment | ||
#groups = factor(paste(key$type,key$treatment,sep='.')) | ||
############################################# | ||
system(paste("mkdir -p ",file.path("analysis",bname,ename))) | ||
file.copy(thisFile(), file.path("analysis", bname, ename, "edger_script.R")) | ||
|
||
|
||
counts = counts[,order(names(counts))] | ||
|
||
######################## | ||
# run Pairwise analysis ... | ||
######################## | ||
y = DGEList(counts=counts, group=groups) | ||
y = calcNormFactors(y) | ||
y = estimateCommonDisp(y) | ||
y = estimateTagwiseDisp(y) | ||
|
||
######################## | ||
# or run GLM analysis | ||
######################## | ||
#y = DGEList(counts=counts) | ||
#y = calcNormFactors(y) | ||
#y = estimateGLMCommonDisp(y, design) | ||
#y = estimateGLMTrendedDisp(y, design) | ||
#y = estimateGLMTagwiseDisp(y, design) | ||
#fit = glmFit(y, design) | ||
|
||
## get normalized counts for each group for outputting to summary spreadsheet | ||
scaled.counts = data.frame(mapply(`*`, counts, y$samples$lib.size * | ||
y$samples$norm.factors/mean(y$samples$lib.size))) | ||
rownames(scaled.counts) = rownames(counts) | ||
dfs = split.data.frame(t(scaled.counts), groups) | ||
dfss = sapply(dfs, colMeans) | ||
|
||
#group_names = levels(groups) | ||
#group_names_means = sapply(group_names, function(x) paste("mean_",x,sep=""), USE.NAMES=FALSE) | ||
#colnames(dfss) = group_names_means | ||
|
||
#### Write results | ||
run_analysis = function(outfile, contrast=NULL, coef=NULL) { | ||
# Pairwise test | ||
lrt = exactTest(y) | ||
# GLM Test | ||
# lrt = glmLRT(fit, contrast=contrast, coef=coef) | ||
|
||
ot1 = topTags(lrt,n=nrow(counts),sort.by="PValue")$table | ||
#if (is.null(contrast)) { | ||
#sel = which(as.logical(contrast)) | ||
#ot1 = merge(ot1, dfss[,sel], by=0) | ||
#} else { | ||
#ot1 = merge(ot1, dfss, by=0) | ||
#} | ||
ot1 = merge(ot1, dfss, by=0) | ||
ot1 = ot1[order(ot1$FDR),] # Sort by ascending FDR | ||
write.csv(ot1,outfile,row.names=FALSE) | ||
|
||
if (!is.null(lrt$table$logFC)){ | ||
detags = rownames(ot1)[ot1$FDR < 0.05] | ||
png(paste(outfile,".png",sep="")) | ||
plotSmear(lrt, de.tags=detags) | ||
abline(h=c(-2,2),col="blue") | ||
dev.off() | ||
} | ||
sink(file=file.path(dirname(outfile), "summary.txt"), append=TRUE) | ||
print(outfile) | ||
print(summary(decideTestsDGE(lrt, p=0.05, adjust="BH"))) | ||
sink(NULL) | ||
|
||
#print(cpm(y)[detags,]) | ||
#print(summary(decideTestsDGE(lrt, p=0.05, adjust="none"))) | ||
} | ||
|
||
meta_run = function(coef) {run_analysis(file.path("analysis",bname,ename,paste(colnames(design)[coef],".csv",sep="")),coef=coef)} | ||
|
||
meta_run(dim(design)[2]) | ||
#meta_run(dim(design)[2]-1) | ||
#meta_run(dim(design)[2]-2) | ||
|
||
system(paste("cat",file.path("analysis",bname,ename,"summary.txt"))) | ||
|
||
# Pairwise test | ||
# run_analysis(file.path("analysis",bname,paste(ename,".csv",sep=""))) | ||
|
||
## output MA, MDS, etc.., plots | ||
png(file.path("analysis",bname,ename,"edger-mds.png")) | ||
p = plotMDS(y) | ||
dev.off() | ||
|
||
png(file.path("analysis",bname,ename,"edger-bcv.png")) | ||
p = plotBCV(y) | ||
dev.off() | ||
|