@@ -77,7 +77,7 @@ def run(self, tool_num_cutoff):
77
77
# urla - note: would be happy to get the dependencies with a stacked LC, but is atm to complicated for me ^^
78
78
dependency = []
79
79
for sample in sample_list :
80
- dependency .extend (Queueing .get_jobs_by_name ("Fetchdata-{}" .format (sample )))
80
+ dependency .extend (Queueing .get_jobs_by_name ("Fetchdata-{}" .format (sample ), cfg . queueing_system ))
81
81
modelling_string = ""
82
82
if cfg .other_files ["easyfuse_model" ]:
83
83
modelling_string = " --model_predictions"
@@ -109,6 +109,7 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
109
109
# kallisto_index_path = indices["kallisto"]
110
110
# pizzly_cache_path = "{}.pizzlyCache.txt".format(genes_gtf_path)
111
111
starfusion_index_path = indices ["starfusion" ]
112
+ fusioncatcher_index_path = indices ["fusioncatcher" ]
112
113
infusion_cfg_path = other_files ["infusion_cfg" ]
113
114
# starchip_param_path = other_files["starchip_param"]
114
115
@@ -133,6 +134,9 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
133
134
infusion_path = os .path .join (fusion_path , "infusion" )
134
135
soapfuse_path = os .path .join (fusion_path , "soapfuse" )
135
136
fetchdata_path = os .path .join (self .working_dir , "Sample_{}" .format (sample_id ), "fetchdata" )
137
+ fastqc_1 = os .path .join (qc_path , sample_id + "_R1_fastqc" , "fastqc_data.txt" )
138
+ fastqc_2 = os .path .join (qc_path , sample_id + "_R2_fastqc" , "fastqc_data.txt" )
139
+
136
140
137
141
for folder in [
138
142
output_results_path ,
@@ -163,9 +167,9 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
163
167
# Define cmd strings for each program
164
168
# urla: mapsplice requires gunzip'd read files and process substitutions don't seem to work in slurm scripts...
165
169
# process substitution do somehow not work from this script - c/p the command line to the terminal, however, works w/o issues?!
166
- cmd_fastqc = "{} --nogroup --extract -t 6 -o {} {} {}" .format (cmds ["fastqc" ], qc_path , fq1 , fq2 )
167
- cmd_qc_parser = "{} -i {}/*/fastqc_data.txt -o {}" .format (os .path .join (module_dir , "misc" , "qc_parser.py" ), qc_path , qc_table_path )
168
- cmd_skewer = "{} -q {} -i {} {} -o {}" .format (os .path .join (module_dir , "tool_wrapper" , "skewer_wrapper.py" ), qc_table_path , fq1 , fq2 , skewer_path )
170
+ cmd_fastqc = "{0 } --nogroup --extract -t 6 -o {1 } {2 } {3 }" .format (cmds ["fastqc" ], qc_path , fq1 , fq2 )
171
+ cmd_qc_parser = "{0 } -i {1} {2} -o {3 }" .format (os .path .join (module_dir , "misc" , "qc_parser.py" ), fastqc_1 , fastqc_2 , qc_table_path )
172
+ cmd_skewer = "{0 } -q {1 } -i {2 } {3 } -o {4 }" .format (os .path .join (module_dir , "tool_wrapper" , "skewer_wrapper.py" ), qc_table_path , fq1 , fq2 , skewer_path )
169
173
170
174
fq0 = ""
171
175
if "QC" in tools :
@@ -192,12 +196,12 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
192
196
cmd_star = "{0} --genomeDir {1} --outFileNamePrefix waiting_for_output_string --runThreadN waiting_for_cpu_number --runMode alignReads --readFilesIn {2} {3} --readFilesCommand zcat --chimSegmentMin 10 --chimJunctionOverhangMin 10 --alignSJDBoverhangMin 10 --alignMatesGapMax {4} --alignIntronMax {4} --chimSegmentReadGapMax 3 --alignSJstitchMismatchNmax 5 -1 5 5 --seedSearchStartLmax 20 --winAnchorMultimapNmax 50 --outSAMtype BAM SortedByCoordinate --chimOutType Junctions SeparateSAMold --chimOutJunctionFormat 1" .format (cmds ["star" ], star_index_path , fq1 , fq2 , cfg .max_dist_proper_pair )
193
197
# (3) Mapslice
194
198
# urla: the "keep" parameter requires gunzip >= 1.6
195
- cmd_extr_fastq1 = "gunzip {0} --keep" .format (fq1 )
196
- cmd_extr_fastq2 = "gunzip {0} --keep" .format (fq2 )
199
+ cmd_extr_fastq1 = "gunzip --keep {0} " .format (fq1 )
200
+ cmd_extr_fastq2 = "gunzip --keep {0} " .format (fq2 )
197
201
# Added python interpreter to circumvent external hardcoded shell script
198
202
cmd_mapsplice = "python {0} --chromosome-dir {1} -x {2} -1 {3} -2 {4} --threads waiting_for_cpu_number --output {5} --qual-scale phred33 --bam --seglen 20 --min-map-len 40 --gene-gtf {6} --fusion" .format (cmds ["mapsplice" ], genome_chrs_path , bowtie_index_path , fq1 [:- 3 ], fq2 [:- 3 ], mapsplice_path , genes_gtf_path )
199
203
# (4) Fusiocatcher
200
- cmd_fusioncatcher = "{0} --input {1} --output {2} -p waiting_for_cpu_number" .format (cmds ["fusioncatcher" ], "," .join ([fq1 , fq2 ]), fusioncatcher_path )
204
+ cmd_fusioncatcher = "{0} --input {1} --data {2} --output {3} - p waiting_for_cpu_number" .format (cmds ["fusioncatcher" ], "," .join ([fq1 , fq2 ]), fusioncatcher_index_path , fusioncatcher_path )
201
205
# star-fusion and star-chip can be run upon a previous star run (this MUST NOT be the star_filter run, but the star_expression run)
202
206
# (5)
203
207
cmd_starfusion = "{0} --chimeric_junction {1} --genome_lib_dir {2} --CPU waiting_for_cpu_number --output_dir {3}" .format (cmds ["starfusion" ], "{}_Chimeric.out.junction" .format (os .path .join (star_path , sample_id )), starfusion_index_path , starfusion_path )
@@ -317,36 +321,37 @@ def execute_pipeline(self, fq1, fq2, sample_id, ref_genome, ref_trans, tool_num_
317
321
exe_cmds [i ] = exe_cmds [i ].replace ("waiting_for_output_string" , exe_path [i ]).replace ("waiting_for_cpu_number" , str (cpu ))
318
322
cmd = " && " .join ([exe_cmds [i ], cmd_samples + tool ])
319
323
# Managing slurm dependencies
324
+ que_sys = cfg .queueing_system
320
325
if tool == "Pizzly" :
321
- dependency = Queueing .get_jobs_by_name ("Kallisto-{0}" .format (sample_id ))
326
+ dependency = Queueing .get_jobs_by_name ("Kallisto-{0}" .format (sample_id ), que_sys )
322
327
elif tool == "Starfusion" or tool == "Starchip" :
323
- dependency = Queueing .get_jobs_by_name ("Star-{0}" .format (sample_id ))
328
+ dependency = Queueing .get_jobs_by_name ("Star-{0}" .format (sample_id ), que_sys )
324
329
elif tool == "Fetchdata" :
325
- dependency = Queueing .get_jobs_by_name (sample_id )
330
+ dependency = Queueing .get_jobs_by_name (sample_id , que_sys )
326
331
elif tool == "Assembly" :
327
- dependency = Queueing .get_jobs_by_name ("Fetchdata-{0}" .format (sample_id ))
332
+ dependency = Queueing .get_jobs_by_name ("Fetchdata-{0}" .format (sample_id ), que_sys )
328
333
elif tool == "ReadFilter" :
329
- dependency = Queueing .get_jobs_by_name ("QC-{0}" .format (sample_id ))
330
- # else:
331
- dependency .extend (Queueing .get_jobs_by_name ("Readfilter-{0}" .format (sample_id )))
332
- dependency .extend (Queueing .get_jobs_by_name ("QC-{0}" .format (sample_id )))
334
+ dependency = Queueing .get_jobs_by_name ("QC-{0}" .format (sample_id ), que_sys )
335
+ dependency .extend (Queueing .get_jobs_by_name ("Readfilter-{0}" .format (sample_id ), que_sys ))
336
+ dependency .extend (Queueing .get_jobs_by_name ("QC-{0}" .format (sample_id ), que_sys ))
333
337
self .logger .debug ("Submitting slurm job: CMD - {0}; PATH - {1}; DEPS - {2}" .format (cmd , exe_path [i ], dependency ))
334
338
self .submit_job (uid , cmd , cpu , mem , exe_path [i ], dependency , "" )
335
339
else :
336
340
self .logger .info ("Skipping {0} as it is not selected for execution (Selected are: {1})" .format (tool , tools ))
337
341
338
342
def submit_job (self , uid , cmd , cores , mem_usage , output_results_folder , dependencies , mail ):
339
343
"""Submit job to slurm scheduling"""
340
- already_running = Queueing .get_jobs_by_name (uid )
344
+ que_sys = cfg .queueing_system
345
+ already_running = Queueing .get_jobs_by_name (uid , que_sys )
341
346
if not already_running :
342
347
# urla: for compatibility reasons (and to be independent of shell commands), concatenated commands are splitted again,
343
348
# dependencies within the splitted groups updated and everything submitted sequentially to the queueing system
344
349
module_file = os .path .join (cfg .module_dir , "build_env.sh" )
345
- que_sys = cfg . queueing_system
350
+
346
351
for i , cmd_split in enumerate (cmd .split (" && " )):
347
352
if not que_sys in ["slurm" , "pbs" ]:
348
353
cmd_split = cmd_split .split (" " )
349
- dependencies .extend (Queueing .get_jobs_by_name ("{0}_CMD{1}" .format (uid , i - 1 )))
354
+ dependencies .extend (Queueing .get_jobs_by_name ("{0}_CMD{1}" .format (uid , i - 1 ), que_sys ))
350
355
Queueing .submit ("{0}_CMD{1}" .format (uid , i ), cmd_split , cores , mem_usage , output_results_folder , dependencies , cfg .partition , cfg .user , cfg .time_limit , mail , module_file , que_sys )
351
356
time .sleep (0.5 )
352
357
else :
@@ -365,7 +370,7 @@ def main():
365
370
366
371
# if version is request, print it and exit
367
372
if args .version :
368
- print (cfg .version )
373
+ print (cfg .__version__ )
369
374
sys .exit (0 )
370
375
371
376
script_call = "python {} -i {} -o {}" .format (os .path .realpath (__file__ ), " " .join ([os .path .abspath (x ) for x in args .input_paths ]), os .path .abspath (args .output_folder ))
0 commit comments