Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 58 additions & 28 deletions R/trackplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,15 @@ read_coldata = function(bws = NULL, sample_names = NULL, build = "hg38", input_t
#' @param build Reference genome build. Default hg38
#' @param padding Extend locus on both sides by this many bps.
#' @param ideoTblName Table name for ideogram. Default `cytoBand`
#' @param use_conda Default NULL. If set to TRUE, uses conda to install required packages.
#' @import data.table
#' @examples
#' bigWigs = system.file("extdata", "bw", package = "trackplot") |> list.files(pattern = "\\.bw$", full.names = TRUE)
#' cd = read_coldata(bws = bigWigs, build = "hg19")
#' oct4_loci = "chr6:31125776-31144789"
#' t = track_extract(colData = cd, loci = oct4_loci, build = "hg19")
#' @export
track_extract = function(colData = NULL, loci = NULL, gene = NULL, binsize = 10, nthreads = 1, query_ucsc = TRUE, gtf = NULL, build = "hg38", padding = 0, ideoTblName = "cytoBand"){
track_extract = function(colData = NULL, loci = NULL, gene = NULL, binsize = 10, nthreads = 1, query_ucsc = TRUE, gtf = NULL, build = "hg38", padding = 0, ideoTblName = "cytoBand",use_conda = NULL){
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, use_conda shouldn't be the environment name instead of T/F.


if(is.null(colData)){
stop("Missing colData. Use read_coldata() to generate one.")
Expand All @@ -145,7 +146,7 @@ track_extract = function(colData = NULL, loci = NULL, gene = NULL, binsize = 10,

.check_windows()
if(input_bw){
.check_bwtool()
.check_bwtool(use_conda = use_conda)
}

.check_dt()
Expand Down Expand Up @@ -808,13 +809,14 @@ track_plot = function(summary_list = NULL,
#' @param ucsc_assembly If `bed` file not provided, setting `ucsc_assembly` to TRUE will fetch transcripts from UCSC genome browser.
#' @param pc_genes Use only protein coding genes when `ucsc_assembly` is used. Default TRUE
#' @param nthreads Default 4
#' @param use_conda Default NULL. If you have conda installed, you can set this to envs to use conda's `bwtool`
#' @seealso \code{\link{profile_summarize}} \code{\link{profile_plot}} \code{\link{profile_heatmap}}
#' @export

profile_extract = function(colData = NULL, bed = NULL, ucsc_assembly = TRUE, startFrom = "start", binSize = 50,
up = 2500, down = 2500, pc_genes = TRUE, nthreads = 4){
up = 2500, down = 2500, pc_genes = TRUE, nthreads = 4,use_conda = NULL){
.check_windows()
.check_bwtool(warn = FALSE)
.check_bwtool(warn = FALSE,use_conda = use_conda)
.check_dt()

if(is.null(colData)){
Expand Down Expand Up @@ -844,8 +846,8 @@ profile_extract = function(colData = NULL, bed = NULL, ucsc_assembly = TRUE, sta
}

message("Extracting signals..")
mats = parallel::mclapply(bigWigs, function(x){
.bwt_mats(bw = x, binSize = binSize, bed = bed, size = paste0(up, ":", down), startFrom = startFrom, op_dir = op_dir)
mats = parallel::mclapply(bigWigs, function(x,use_conda = use_conda){
.bwt_mats(bw = x, binSize = binSize, bed = bed, size = paste0(up, ":", down), startFrom = startFrom, op_dir = op_dir,use_conda = use_conda )
}, mc.cores = nthreads)

mats = as.character(unlist(x = mats))
Expand Down Expand Up @@ -1128,16 +1130,17 @@ profile_heatmap = function(mat_list, sortBy = "mean", col_pal = "Blues", revpal
#' @param up extend upstream by this many bps from `startFrom`. Default 2500
#' @param down extend downstream by this many bps from `startFrom`. Default 2500
#' @param nthreads Default 4
#' @param use_conda Default NULL. If not NULL, will use conda to install required packages.
#' @export
extract_summary = function(colData, bed = NULL, ucsc_assembly = TRUE, startFrom = "start", binSize = 50,
up = 2500, down = 2500, pc_genes = TRUE, nthreads = 4){
up = 2500, down = 2500, pc_genes = TRUE, nthreads = 4,use_conda = NULL){

if(is.null(colData)){
stop("Missing colData. Use read_coldata() to generate one.")
}

.check_windows()
.check_bwtool()
.check_bwtool(use_conda = use_conda)
.check_dt()

bigWigs = colData$bw_files
Expand Down Expand Up @@ -1168,10 +1171,15 @@ extract_summary = function(colData, bed = NULL, ucsc_assembly = TRUE, startFrom
}

message("Extracting summaries..")
summaries = parallel::mclapply(seq_along(bigWigs), FUN = function(idx){
summaries = parallel::mclapply(seq_along(bigWigs), FUN = function(idx,use_conda = use_conda){
bw = bigWigs[idx]
bn = custom_names[idx]
cmd = paste("bwtool summary -with-sum -keep-bed -header", bed, bw, paste0(op_dir, bn, ".summary"))
if (!is.null(use_conda)){
cmd_conda = paste("conda run -n", use_conda)
} else{
cmd_conda = ""
}
cmd = paste(cmd_conda,"bwtool summary -with-sum -keep-bed -header", bed, bw, paste0(op_dir, bn, ".summary"))
system(command = cmd, intern = TRUE)
paste0(op_dir, bn, ".summary")
}, mc.cores = nthreads)
Expand Down Expand Up @@ -1701,7 +1709,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
lo_h_ord[lord]
}

.gen_windows = function(chr = NA, start, end, window_size = 50, op_dir = getwd()){
.gen_windows = function(chr = NA, start, end, window_size = 50, op_dir = getwd(),use_conda = NULL){
#chr = "chr19"; start = 15348301; end = 15391262; window_size = 50; op_dir = getwd()
message(paste0("Generating windows ", "[", window_size, " bp window size]"))

Expand Down Expand Up @@ -1741,7 +1749,12 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
summaries = parallel::mclapply(bigWigs, FUN = function(bw){
bn = gsub(pattern = "\\.bw$|\\.bigWig$", replacement = "", x = basename(bw))
message(paste0(" Processing ", bn, " .."))
cmd = paste("bwtool summary -with-sum -keep-bed -header", bedSimple, bw, paste0(op_dir, bn, ".summary"))
if (!is.null(use_conda)){
cmd_conda = paste("conda run -n", use_conda)
} else{
cmd_conda = ""
}
cmd = paste(cmd_conda,"bwtool summary -with-sum -keep-bed -header", bedSimple, bw, paste0(op_dir, bn, ".summary"))
system(command = cmd, intern = TRUE)
paste0(op_dir, bn, ".summary")
}, mc.cores = nthreads)
Expand Down Expand Up @@ -1821,7 +1834,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
}

cmd = paste0(
"mysql --user genome --host genome-mysql.soe.ucsc.edu -NAD ",
"mariadb --user genome --host genome-mysql.soe.ucsc.edu -A -P 3306 -NAD ",
refBuild,
" -e 'select chrom, chromStart, chromEnd, name, gieStain from ", tblName, " WHERE chrom =\"",
chr,
Expand Down Expand Up @@ -1897,7 +1910,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =

.check_mysql()

cmd = paste0("mysql --user genome --host genome-mysql.soe.ucsc.edu -NAD ", refBuild, " -e 'select chrom, chromStart, chromEnd, name from ", tbl, " WHERE chrom =\"", tar_chr, "\"'")
cmd = paste0("mariadb --user genome --host genome-mysql.soe.ucsc.edu -A -P 3306 -NAD ", refBuild, " -e 'select chrom, chromStart, chromEnd, name from ", tbl, " WHERE chrom =\"", tar_chr, "\"'")
message(paste0("Extracting chromHMM from UCSC:\n", " chromosome: ", tar_chr, "\n", " build: ", refBuild, "\n query: ", cmd))
#system(command = cmd)
ucsc = data.table::fread(cmd = cmd)
Expand All @@ -1921,7 +1934,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
.check_mysql()
op_file = tempfile(pattern = "ucsc", fileext = ".tsv")

cmd = paste0("mysql --user genome --host genome-mysql.soe.ucsc.edu -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2, exonStarts, exonEnds from refGene WHERE name2 =\"", genesymbol, "\"'")
cmd = paste0("mariadb --user genome --host genome-mysql.soe.ucsc.edu -A -P 3306 -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2, exonStarts, exonEnds from refGene WHERE name2 =\"", genesymbol, "\"'")
message(paste0("Extracting gene models from UCSC:\n", " Gene: ", genesymbol, "\n", " build: ", refBuild, "\n query: ", cmd))

ucsc = data.table::fread(cmd = cmd, sep = "\t")
Expand All @@ -1946,7 +1959,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
tar_chr = chr
}

cmd = paste0("mysql --user genome --host genome-mysql.soe.ucsc.edu -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2, exonStarts, exonEnds from refGene WHERE chrom =\"", tar_chr, "\"'")
cmd = paste0("mariadb --user genome --host genome-mysql.soe.ucsc.edu -A -P 3306 -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2, exonStarts, exonEnds from refGene WHERE chrom =\"", tar_chr, "\"'")
message(paste0("Extracting gene models from UCSC:\n", " chromosome: ", tar_chr, "\n", " build: ", refBuild, "\n query: ", cmd))
#system(command = cmd)
ucsc = data.table::fread(cmd = cmd)
Expand Down Expand Up @@ -2159,7 +2172,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =

temp_op_bed = tempfile(pattern = "profileplot_ucsc", tmpdir = op_dir, fileext = ".bed")

cmd = paste0("mysql --user genome --host genome-mysql.soe.ucsc.edu -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2 from refGene'")
cmd = paste0("mariadb --user genome --host genome-mysql.soe.ucsc.edu -A -P 3306 -NAD ", refBuild, " -e 'select chrom, txStart, txEnd, strand, name, name2 from refGene'")
message(paste0("Extracting gene models from UCSC:\n", " build: ", refBuild, "\n query: ", cmd))
#system(command = cmd)
ucsc = data.table::fread(cmd = cmd)
Expand Down Expand Up @@ -2264,20 +2277,24 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
return(temp_op_bed)
}

.bwt_mats = function(bw, binSize, bed, size, startFrom, op_dir){
.bwt_mats = function(bw, binSize, bed, size, startFrom, op_dir,use_conda = NULL){

bn = gsub(pattern = "\\.bw$|\\.bigWig$", replacement = "",
x = basename(bw), ignore.case = TRUE)
message(paste0("Processing ", bn, ".."))

bw = gsub(pattern = " ", replacement = "\\ ", x = bw, fixed = TRUE) #Change spaces with \ for unix style paths

if (!is.null(use_conda)){
cmd_conda = paste("conda run -n", use_conda)
} else{
cmd_conda = ""
}
if(startFrom == "start"){
cmd = paste0("bwtool matrix -starts -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
cmd = paste0( cmd_conda,"bwtool matrix -starts -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
}else if(startFrom == "end"){
cmd = paste0("bwtool matrix -ends -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
cmd = paste0( cmd_conda,"bwtool matrix -ends -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
}else{
cmd = paste0("bwtool matrix -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
cmd = paste0( cmd_conda,"bwtool matrix -tiled-averages=", binSize, " ", size, " " , bed , " ", bw, " ", paste0(op_dir, "/", bn, ".matrix"))
}
print(cmd)

Expand Down Expand Up @@ -2364,12 +2381,16 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
mat
}

.extract_summary = function(bw, binSize, bed, op_dir){
.extract_summary = function(bw, binSize, bed, op_dir,use_conda = NULL){
bn = gsub(pattern = "\\.bw$|\\.bigWig$", replacement = "",
x = basename(bw), ignore.case = TRUE)
message(paste0(" Processing ", bn, ".."))

cmd = paste("bwtool summary -with-sum -keep-bed -header", bedSimple, bw, paste0(op_dir, bn, ".summary"))
if (!is.null(use_conda)){
cmd_conda = paste0("conda run -n ", use_conda)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

what is the use_conda argument in this context? I guess it should be the environment name, but the main argument seems to be TRUE or FALSE.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi!
Indeed, the use_conda argument should be designed to accept the name of the conda environment. It is not appropriate to simply name it use_conda, as this could potentially mislead users. I suggest renaming it to use_conda_env to avoid confusion and to clearly communicate the purpose of the argument.
I believe that bwtool is much easier to install via conda. I think this improvement would make this package more robust.

}else {
cmd_conda = ""
}
cmd = paste(cmd_conda,"bwtool summary -with-sum -keep-bed -header", bedSimple, bw, paste0(op_dir, bn, ".summary"))
paste0(op_dir, "/", bn, ".summary")
}

Expand All @@ -2382,8 +2403,17 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
data.table::data.table(chr, start, end)
}

.check_bwtool = function(warn = FALSE){
check = as.character(Sys.which(names = 'bwtool'))[1]
.check_bwtool = function(warn = FALSE, use_conda = NULL){
if (!is.null(use_conda)){
check <- try(system(paste0("conda run -n ", use_conda, " bwtool -h")))
if (class(check) == "try-error"){
check = as.character(Sys.which(names = 'bwtool'))[1]
}else{
check <- "conda mod"
}
} else{
check = as.character(Sys.which(names = 'bwtool'))[1]
}
if(check != ""){
if(warn){
message("Checking for bwtool installation")
Expand All @@ -2397,7 +2427,7 @@ summarize_homer_annots = function(anno, sample_names = NULL, legend_font_size =
}

.check_mysql = function(warn = FALSE){
check = as.character(Sys.which(names = 'mysql'))[1]
check = as.character(Sys.which(names = 'mariadb'))[1]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Has mysql been deprecated?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am using MariaDB on Arch Linux, and the mysql package is being deprecated. It is expected to be removed in a future release, and it's suggested to use mariadb instead. By the way, MariaDB is a fork of MySQL , which is originated from the same source but has better performents and easy to install.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thank you. Just FYI, there is a high possibility that the code base will be changed to use the RMySQL or the RMariaDB R packages instead of command line SQL queries.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aye, by the way, I've retained the original mysql command in a separate commit.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. I will review them soon.

if(check != ""){
if(warn){
message("Checking for mysql installation")
Expand Down