From ab412f1fbd50453030556e571baf2f3b739226da Mon Sep 17 00:00:00 2001
From: candicechu <candy9087@gmail.com>
Date: Thu, 14 Apr 2016 17:18:35 -0500
Subject: [PATCH] Added files via upload

---
 biomart_refseq2ensembl.R | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 biomart_refseq2ensembl.R

diff --git a/biomart_refseq2ensembl.R b/biomart_refseq2ensembl.R
new file mode 100644
index 0000000..c86ad89
--- /dev/null
+++ b/biomart_refseq2ensembl.R
@@ -0,0 +1,35 @@
+# If you are using UCSC genome and annotation with Tuxedo suite tools, 
+# you might need to converse your refseq ID into Ensembl Gene ID fisrt
+# then feed it into the Panther Classification System for gene ontology analysis.
+
+# This R script provides useful solution to ID conversion.
+
+# Install BioMart
+source("https://bioconductor.org/biocLite.R")
+biocLite("biomaRt")
+
+# Enter BioMart
+library("biomaRt")
+
+# Choose the database
+ensembl=useMart("ensembl")
+
+# Select canine genome in the Ensembl database
+ensembl = useDataset("cfamiliaris_gene_ensembl",mart=ensembl)
+
+# Import you input refseq.list into R. 
+# The refseq.list can be made from editing your Cuffdiff output gene_exp.diff. 
+# Use the following commands in linux to get the ideal refseq.list:
+# $less gene_exp.diff | grep yes | grep rapid | grep control | cut -f3 | grep -v "-" > refseq.list.txt
+# If you wish you have numeric values for Panther, please do change cut -f3 into cut -f3,x 
+# (x means the order of column that you want to put into refseq.list)
+
+mydata = read.table("refseq.list.txt") 
+
+# Convert refseq to Ensembl Gene ID
+results<- getBM(attributes = c("refseq_mrna","ensembl_gene_id"), filters="refseq_mrna", values=mydata, mart=ensembl)
+
+# Export your Ensembl Gene ID list for Panther
+write.table(results[,2], file="mydata.txt", row.names=FALSE, col.names=FALSE, quote=FALSE)
+
+# Check mydata.txt in your working directory. Now you have a proper list ID for Panther!
\ No newline at end of file