Skip to content

Commit 1361e67

Browse files
committed
changes for version 1.3.3
1 parent 882cacc commit 1361e67

File tree

5 files changed

+41
-41
lines changed

5 files changed

+41
-41
lines changed

README.md

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,16 @@ Install python modules (we strongly recommend installation via conda):
4949
conda install -c bioconda pysam=0.15.2 star=2.6.1b star-fusion=1.5.0 bowtie2=2.3.4.3 bx-python=0.8.2 crossmap=0.2.7
5050
```
5151

52-
53-
- R (>= 3.5.1)
52+
- R (>= 3.6.0)
5453
- R packages:
55-
- optparse
56-
- tidyverse
57-
- randomForest
58-
- Biostrings
59-
- GenomicRanges
60-
- BSgenome
61-
- bindrcpp
54+
- optparse (1.6.4)
55+
- tidyverse (1.3.0)
56+
- randomForest (4.6-14)
6257

6358
Install packages within R by
6459

6560
```
66-
install.packages(c("optparse", "tidyverse", "randomForest", "Biostrings","BiocManager","BSgenome","optparse"))
67-
BiocManager::install("GenomicRanges") #bioconductor package
61+
install.packages(c("optparse", "tidyverse", "randomForest"))
6862
```
6963

7064
## Usage
@@ -86,10 +80,10 @@ processing.py \
8680

8781
Before executing the example command
8882

89-
- [ ] rename `build_env.sh.smaple` into `build_env.sh` and configure content.
90-
- [ ] rename `config.py.smaple` into `config.py` and configure content.
91-
- [ ] rename `blacklist.txt.sample` into `blacklist.txt`.
83+
- rename `build_env.sh.smaple` into `build_env.sh` and configure content.
84+
- rename `config.py.smaple` into `config.py` and configure content.
85+
- rename `blacklist.txt.sample` into `blacklist.txt`.
9286

9387
```
9488
python processing.py -i test_case/SRR1659960_05pc_* -o test_easyfuse_1.3.1/
95-
```
89+
```

config.py.sample

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import os
99
# 3) Which reference data shall be used (ref_trans_version & ref_genome_build)
1010
# 4) To whom shall slurm mails be sent to (receiver)
1111

12-
__version__ = "1.3.2"
12+
version = "1.3.4"
1313

1414
pipeline_name = "EasyFuse"
1515

@@ -162,4 +162,4 @@ other_files = {
162162
"soapfuse_cfg": "/path/to/soapfuse_config/config_h<release>.txt",
163163
"soapfuse_cfg_mm10": "/path/to/soapfuse_config/config_m<release>.txt",
164164
"easyfuse_model": os.path.join(module_dir, "data", "model", "Fusion_modeling_FFPE_deploy_v01.model_full_data.EasyFuse_model.rds")
165-
}
165+
}

misc/queueing.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def get_jobs_by_name(name, system="slurm"):
2323
return get_jobs_by_name_slurm(name)
2424
elif system == "pbs":
2525
return get_jobs_by_name_pbs(name)
26+
else:
27+
return []
2628

2729
def get_jobs_by_name_pbs(name):
2830
jobs = []
@@ -70,12 +72,13 @@ def submit(job_name, cmd, cores, mem_usage, output_results_folder, dependencies,
7072
elif sched == "pbs":
7173
_submit_pbs(job_name, cmd, cores, mem_usage, output_results_folder, dependencies, module_file)
7274
else:
73-
_submit_nonqueue(cmd, module_file)
75+
_submit_nonqueue(job_name, cmd, module_file)
7476

75-
def _submit_nonqueue(cmd, module_file=""):
77+
def _submit_nonqueue(job_name, cmd, module_file=""):
7678
# if module_file:
7779
# cmd = " && ".join(["source " + module_file, " ".join(cmd)]).split(" ")
78-
# print(cmd)
80+
print("Running {}".format(job_name))
81+
print("CMD: {}".format(cmd))
7982
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False)
8083
(stdoutdata, stderrdata) = p.communicate()
8184
print(stdoutdata)

processing.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def run(self, tool_num_cutoff):
7777
# urla - note: would be happy to get the dependencies with a stacked LC, but is atm to complicated for me ^^
7878
dependency = []
7979
for sample in sample_list:
80-
dependency.extend(Queueing.get_jobs_by_name("Fetchdata-{}".format(sample)))
80+
dependency.extend(Queueing.get_jobs_by_name("Fetchdata-{}".format(sample), cfg.queueing_system))
8181
modelling_string = ""
8282
if cfg.other_files["easyfuse_model"]:
8383
modelling_string = " --model_predictions"
@@ -109,6 +109,7 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
109109
# kallisto_index_path = indices["kallisto"]
110110
# pizzly_cache_path = "{}.pizzlyCache.txt".format(genes_gtf_path)
111111
starfusion_index_path = indices["starfusion"]
112+
fusioncatcher_index_path = indices["fusioncatcher"]
112113
infusion_cfg_path = other_files["infusion_cfg"]
113114
# starchip_param_path = other_files["starchip_param"]
114115

@@ -133,6 +134,9 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
133134
infusion_path = os.path.join(fusion_path, "infusion")
134135
soapfuse_path = os.path.join(fusion_path, "soapfuse")
135136
fetchdata_path = os.path.join(self.working_dir, "Sample_{}".format(sample_id), "fetchdata")
137+
fastqc_1 = os.path.join(qc_path, sample_id + "_R1_fastqc", "fastqc_data.txt")
138+
fastqc_2 = os.path.join(qc_path, sample_id + "_R2_fastqc", "fastqc_data.txt")
139+
136140

137141
for folder in [
138142
output_results_path,
@@ -163,9 +167,9 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
163167
# Define cmd strings for each program
164168
# urla: mapsplice requires gunzip'd read files and process substitutions don't seem to work in slurm scripts...
165169
# process substitution do somehow not work from this script - c/p the command line to the terminal, however, works w/o issues?!
166-
cmd_fastqc = "{} --nogroup --extract -t 6 -o {} {} {}".format(cmds["fastqc"], qc_path, fq1, fq2)
167-
cmd_qc_parser = "{} -i {}/*/fastqc_data.txt -o {}".format(os.path.join(module_dir, "misc", "qc_parser.py"), qc_path, qc_table_path)
168-
cmd_skewer = "{} -q {} -i {} {} -o {}".format(os.path.join(module_dir, "tool_wrapper", "skewer_wrapper.py"), qc_table_path, fq1, fq2, skewer_path)
170+
cmd_fastqc = "{0} --nogroup --extract -t 6 -o {1} {2} {3}".format(cmds["fastqc"], qc_path, fq1, fq2)
171+
cmd_qc_parser = "{0} -i {1} {2} -o {3}".format(os.path.join(module_dir, "misc", "qc_parser.py"), fastqc_1, fastqc_2, qc_table_path)
172+
cmd_skewer = "{0} -q {1} -i {2} {3} -o {4}".format(os.path.join(module_dir, "tool_wrapper", "skewer_wrapper.py"), qc_table_path, fq1, fq2, skewer_path)
169173

170174
fq0 = ""
171175
if "QC" in tools:
@@ -192,12 +196,12 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
192196
cmd_star = "{0} --genomeDir {1} --outFileNamePrefix waiting_for_output_string --runThreadN waiting_for_cpu_number --runMode alignReads --readFilesIn {2} {3} --readFilesCommand zcat --chimSegmentMin 10 --chimJunctionOverhangMin 10 --alignSJDBoverhangMin 10 --alignMatesGapMax {4} --alignIntronMax {4} --chimSegmentReadGapMax 3 --alignSJstitchMismatchNmax 5 -1 5 5 --seedSearchStartLmax 20 --winAnchorMultimapNmax 50 --outSAMtype BAM SortedByCoordinate --chimOutType Junctions SeparateSAMold --chimOutJunctionFormat 1".format(cmds["star"], star_index_path, fq1, fq2, cfg.max_dist_proper_pair)
193197
# (3) Mapslice
194198
# urla: the "keep" parameter requires gunzip >= 1.6
195-
cmd_extr_fastq1 = "gunzip {0} --keep".format(fq1)
196-
cmd_extr_fastq2 = "gunzip {0} --keep".format(fq2)
199+
cmd_extr_fastq1 = "gunzip --keep {0}".format(fq1)
200+
cmd_extr_fastq2 = "gunzip --keep {0}".format(fq2)
197201
# Added python interpreter to circumvent external hardcoded shell script
198202
cmd_mapsplice = "python {0} --chromosome-dir {1} -x {2} -1 {3} -2 {4} --threads waiting_for_cpu_number --output {5} --qual-scale phred33 --bam --seglen 20 --min-map-len 40 --gene-gtf {6} --fusion".format(cmds["mapsplice"], genome_chrs_path, bowtie_index_path, fq1[:-3], fq2[:-3], mapsplice_path, genes_gtf_path)
199203
# (4) Fusiocatcher
200-
cmd_fusioncatcher = "{0} --input {1} --output {2} -p waiting_for_cpu_number".format(cmds["fusioncatcher"], ",".join([fq1, fq2]), fusioncatcher_path)
204+
cmd_fusioncatcher = "{0} --input {1} --data {2} --output {3} -p waiting_for_cpu_number".format(cmds["fusioncatcher"], ",".join([fq1, fq2]), fusioncatcher_index_path, fusioncatcher_path)
201205
# star-fusion and star-chip can be run upon a previous star run (this MUST NOT be the star_filter run, but the star_expression run)
202206
# (5)
203207
cmd_starfusion = "{0} --chimeric_junction {1} --genome_lib_dir {2} --CPU waiting_for_cpu_number --output_dir {3}".format(cmds["starfusion"], "{}_Chimeric.out.junction".format(os.path.join(star_path, sample_id)), starfusion_index_path, starfusion_path)
@@ -317,36 +321,37 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
317321
exe_cmds[i] = exe_cmds[i].replace("waiting_for_output_string", exe_path[i]).replace("waiting_for_cpu_number", str(cpu))
318322
cmd = " && ".join([exe_cmds[i], cmd_samples + tool])
319323
# Managing slurm dependencies
324+
que_sys = cfg.queueing_system
320325
if tool == "Pizzly":
321-
dependency = Queueing.get_jobs_by_name("Kallisto-{0}".format(sample_id))
326+
dependency = Queueing.get_jobs_by_name("Kallisto-{0}".format(sample_id), que_sys)
322327
elif tool == "Starfusion" or tool == "Starchip":
323-
dependency = Queueing.get_jobs_by_name("Star-{0}".format(sample_id))
328+
dependency = Queueing.get_jobs_by_name("Star-{0}".format(sample_id), que_sys)
324329
elif tool == "Fetchdata":
325-
dependency = Queueing.get_jobs_by_name(sample_id)
330+
dependency = Queueing.get_jobs_by_name(sample_id, que_sys)
326331
elif tool == "Assembly":
327-
dependency = Queueing.get_jobs_by_name("Fetchdata-{0}".format(sample_id))
332+
dependency = Queueing.get_jobs_by_name("Fetchdata-{0}".format(sample_id), que_sys)
328333
elif tool == "ReadFilter":
329-
dependency = Queueing.get_jobs_by_name("QC-{0}".format(sample_id))
330-
# else:
331-
dependency.extend(Queueing.get_jobs_by_name("Readfilter-{0}".format(sample_id)))
332-
dependency.extend(Queueing.get_jobs_by_name("QC-{0}".format(sample_id)))
334+
dependency = Queueing.get_jobs_by_name("QC-{0}".format(sample_id), que_sys)
335+
dependency.extend(Queueing.get_jobs_by_name("Readfilter-{0}".format(sample_id), que_sys))
336+
dependency.extend(Queueing.get_jobs_by_name("QC-{0}".format(sample_id), que_sys))
333337
self.logger.debug("Submitting slurm job: CMD - {0}; PATH - {1}; DEPS - {2}".format(cmd, exe_path[i], dependency))
334338
self.submit_job(uid, cmd, cpu, mem, exe_path[i], dependency, "")
335339
else:
336340
self.logger.info("Skipping {0} as it is not selected for execution (Selected are: {1})".format(tool, tools))
337341

338342
def submit_job(self, uid, cmd, cores, mem_usage, output_results_folder, dependencies, mail):
339343
"""Submit job to slurm scheduling"""
340-
already_running = Queueing.get_jobs_by_name(uid)
344+
que_sys = cfg.queueing_system
345+
already_running = Queueing.get_jobs_by_name(uid, que_sys)
341346
if not already_running:
342347
# urla: for compatibility reasons (and to be independent of shell commands), concatenated commands are splitted again,
343348
# dependencies within the splitted groups updated and everything submitted sequentially to the queueing system
344349
module_file = os.path.join(cfg.module_dir, "build_env.sh")
345-
que_sys = cfg.queueing_system
350+
346351
for i, cmd_split in enumerate(cmd.split(" && ")):
347352
if not que_sys in ["slurm", "pbs"]:
348353
cmd_split = cmd_split.split(" ")
349-
dependencies.extend(Queueing.get_jobs_by_name("{0}_CMD{1}".format(uid, i - 1)))
354+
dependencies.extend(Queueing.get_jobs_by_name("{0}_CMD{1}".format(uid, i - 1), que_sys))
350355
Queueing.submit("{0}_CMD{1}".format(uid, i), cmd_split, cores, mem_usage, output_results_folder, dependencies, cfg.partition, cfg.user, cfg.time_limit, mail, module_file, que_sys)
351356
time.sleep(0.5)
352357
else:
@@ -365,7 +370,7 @@ def main():
365370

366371
# if version is request, print it and exit
367372
if args.version:
368-
print(cfg.version)
373+
print(cfg.__version__)
369374
sys.exit(0)
370375

371376
script_call = "python {} -i {} -o {}".format(os.path.realpath(__file__), " ".join([os.path.abspath(x) for x in args.input_paths]), os.path.abspath(args.output_folder))

summarize_data.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
import time
1212
import argparse
1313

14-
import pandas as pd
15-
import seaborn as sns
1614
from join_data import DataJoining
1715
from misc.samples import SamplesDB
1816
import misc.io_methods as IOMethods

0 commit comments

Comments
 (0)