|
| 1 | +library(edgeR) |
| 2 | + |
| 3 | +# From: http://stackoverflow.com/questions/1815606/rscript-determine-path-of-the-executing-script |
| 4 | +# So we can copy this script to where the analysis output is saved |
| 5 | +thisFile <- function() { |
| 6 | + cmdArgs <- commandArgs(trailingOnly = FALSE) |
| 7 | + needle <- "--file=" |
| 8 | + match <- grep(needle, cmdArgs) |
| 9 | + if (length(match) > 0) { |
| 10 | + # Rscript |
| 11 | + return(normalizePath(sub(needle, "", cmdArgs[match]))) |
| 12 | + } else { |
| 13 | + # 'source'd via R console |
| 14 | + return(normalizePath(sys.frames()[[1]]$ofile)) |
| 15 | + } |
| 16 | +} |
| 17 | + |
| 18 | +#args = c("lists/organoid","keys/organoid.csv") |
| 19 | +args = c("lists/candice_list","keys/candice-key.csv") |
| 20 | + |
| 21 | +# Get counts file from analysis/fname/fname.T.csv |
| 22 | +bname = basename(args[1]) |
| 23 | +fname = paste(bname,"-count.T.csv",sep='') |
| 24 | +dat = read.csv(file.path("analysis",bname,fname), header = TRUE, row.names=1) |
| 25 | + |
| 26 | +# Filter low count reads |
| 27 | +keep = rowSums(cpm(dat) > 3) >= 3 |
| 28 | +counts = dat[keep,] |
| 29 | + |
| 30 | +## Read in key file |
| 31 | +key = read.csv(args[2], header=TRUE, row.names=1) |
| 32 | + |
| 33 | +############################################# |
| 34 | +## Create model comparison matrix with sample key |
| 35 | +############################################# |
| 36 | +# Lampe Biopsy main |
| 37 | +sel = grepl("MT-.*", rownames(counts)) + grepl("ERCC-.*", rownames(counts)) + grepl("mt-.*", rownames(counts)) |
| 38 | +# We have to filter out 6123 because its not full rank |
| 39 | +# > summary(factor(sapply(names(counts), function(x) substr(x,2,5)))) |
| 40 | +# 6102 6103 6105 6106 6108 6110 6121 6122 6123 6127 |
| 41 | +# 8 8 8 8 8 8 8 8 6 8 |
| 42 | +counts = counts[!sel,] |
| 43 | +# counts = counts[!sel,!grepl("6123", names(counts))] |
| 44 | +# key = key[!grepl("6123", rownames(key)),] |
| 45 | +ename = "edger-pair-treatment" |
| 46 | +factors = key[order(rownames(key)), c(sample,type)] |
| 47 | +factors$type = factor(factors$type) |
| 48 | +# factors$treatment = relevel(factors$treatment, "placebo") |
| 49 | +design = model.matrix(~sample+type, data=factors) |
| 50 | +groups = factors$type |
| 51 | +#groups = factor(paste(key$tissue,key$location,sep='.')) |
| 52 | +############################################# |
| 53 | +system(paste("mkdir -p ",file.path("analysis",bname,ename))) |
| 54 | +file.copy(thisFile(), file.path("analysis", bname, ename, "edger_script.R")) |
| 55 | + |
| 56 | + |
| 57 | +counts = counts[,order(names(counts))] |
| 58 | + |
| 59 | +######################## |
| 60 | +# run Pairwise analysis ... |
| 61 | +######################## |
| 62 | +# y = DGEList(counts=counts, group=factors) |
| 63 | +# y = calcNormFactors(y) |
| 64 | +# y = estimateCommonDisp(y) |
| 65 | +# y = estimateTagwiseDisp(y) |
| 66 | + |
| 67 | +######################## |
| 68 | +# or run GLM analysis |
| 69 | +######################## |
| 70 | +y = DGEList(counts=counts) |
| 71 | +y = calcNormFactors(y) |
| 72 | +y = estimateGLMCommonDisp(y, design) |
| 73 | +y = estimateGLMTrendedDisp(y, design) |
| 74 | +y = estimateGLMTagwiseDisp(y, design) |
| 75 | +fit = glmFit(y, design) |
| 76 | + |
| 77 | +## get normalized counts for each group for outputting to summary spreadsheet |
| 78 | +scaled.counts = data.frame(mapply(`*`, counts, y$samples$lib.size * |
| 79 | + y$samples$norm.factors/mean(y$samples$lib.size))) |
| 80 | +rownames(scaled.counts) = rownames(counts) |
| 81 | +dfs = split.data.frame(t(scaled.counts), groups) |
| 82 | +dfss = sapply(dfs, colMeans) |
| 83 | + |
| 84 | +#group_names = levels(groups) |
| 85 | +#group_names_means = sapply(group_names, function(x) paste("mean_",x,sep=""), USE.NAMES=FALSE) |
| 86 | +#colnames(dfss) = group_names_means |
| 87 | + |
| 88 | +#### Write results |
| 89 | +run_analysis = function(outfile, contrast=NULL, coef=NULL) { |
| 90 | + # Pairwise test |
| 91 | + # lrt = exactTest(y) |
| 92 | + # GLM Test |
| 93 | + lrt = glmLRT(fit, contrast=contrast, coef=coef) |
| 94 | + |
| 95 | + ot1 = topTags(lrt,n=nrow(counts),sort.by="PValue")$table |
| 96 | + #if (is.null(contrast)) { |
| 97 | + #sel = which(as.logical(contrast)) |
| 98 | + #ot1 = merge(ot1, dfss[,sel], by=0) |
| 99 | + #} else { |
| 100 | + #ot1 = merge(ot1, dfss, by=0) |
| 101 | + #} |
| 102 | + ot1 = merge(ot1, dfss, by=0) |
| 103 | + ot1 = ot1[order(ot1$FDR),] # Sort by ascending FDR |
| 104 | + write.csv(ot1,outfile,row.names=FALSE) |
| 105 | + |
| 106 | + if (!is.null(lrt$table$logFC)){ |
| 107 | + detags = rownames(ot1)[ot1$FDR < 0.05] |
| 108 | + png(paste(outfile,".png",sep="")) |
| 109 | + plotSmear(lrt, de.tags=detags) |
| 110 | + abline(h=c(-2,2),col="blue") |
| 111 | + dev.off() |
| 112 | + } |
| 113 | + sink(file=file.path(dirname(outfile), "summary.txt"), append=TRUE) |
| 114 | + print(outfile) |
| 115 | + print(summary(decideTestsDGE(lrt, p=0.05, adjust="BH"))) |
| 116 | + sink(NULL) |
| 117 | + |
| 118 | + #print(cpm(y)[detags,]) |
| 119 | + #print(summary(decideTestsDGE(lrt, p=0.05, adjust="none"))) |
| 120 | +} |
| 121 | + |
| 122 | +meta_run = function(coef) {run_analysis(file.path("analysis",bname,ename,paste(colnames(design)[coef],".csv",sep="")),coef=coef)} |
| 123 | + |
| 124 | +meta_run(dim(design)[2]) |
| 125 | +meta_run(dim(design)[2]-1) |
| 126 | +meta_run(dim(design)[2]-2) |
| 127 | + |
| 128 | +system(paste("cat",file.path("analysis",bname,ename,"summary.txt"))) |
| 129 | + |
| 130 | +# Pairwise test |
| 131 | +# run_analysis(file.path("analysis",bname,paste(ename,".csv",sep=""))) |
| 132 | + |
| 133 | +## output MA, MDS, etc.., plots |
| 134 | +png(file.path("analysis",bname,ename,"edger-mds.png")) |
| 135 | +p = plotMDS(y) |
| 136 | +dev.off() |
| 137 | + |
| 138 | +png(file.path("analysis",bname,ename,"edger-bcv.png")) |
| 139 | +p = plotBCV(y) |
| 140 | +dev.off() |
0 commit comments