Skip to content

Commit 1274bc9

Browse files
authored
Merge pull request #25 from microbiomedata/21-stats-file
21 Added in stats file for outputs
2 parents 8d7df73 + 34e83d1 commit 1274bc9

File tree

6 files changed

+109
-98
lines changed

6 files changed

+109
-98
lines changed

docs/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Metagenome Assembly Workflow (v1.0.2)
2-
========================================
2+
=====================================
33

44
.. image:: workflow_assembly.png
55
:scale: 60%

docs/workflow_assembly.png

-129 KB
Loading

input.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"jgi_metaAssembly.input_files":["https://data.microbiomedata.org/data/test_data/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz"],
2+
"jgi_metaAssembly.input_files":["/global/cfs/cdirs/m3408/www/test_data/SRR13128014.pacbio.subsample.ccs.fastq.gz"],
33
"jgi_metaAssembly.proj":"nmdc:503125_160870",
44
"jgi_metaAssembly.memory": "105G",
55
"jgi_metaAssembly.threads": "16",
6-
"jgi_metaAssembly.shortRead": true
6+
"jgi_metaAssembly.shortRead": false
77
}

jgi_assembly.wdl

Lines changed: 72 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import "shortReads_assembly.wdl" as srma
33
import "make_interleaved_WDL/make_interleaved_reads.wdl" as int
44
import "https://code.jgi.doe.gov/BFoster/jgi_meta_wdl/-/raw/bc7c4371ea0fa83355bada341ec353b9feb3eff2/metagenome_improved/metaflye.wdl" as lrma
55

6-
workflow jgi_metaAssembly{
6+
workflow jgi_metaAssembly {
77
input {
88
Boolean shortRead
99
String proj
@@ -20,62 +20,60 @@ workflow jgi_metaAssembly{
2020
String minimap2_container = "staphb/minimap2:2.25"
2121
String minimap2_parameters = "-a -x map-hifi -t 32"
2222
String samtools_container = "staphb/samtools:1.18"
23-
String bbtools_container = "microbiomedata/bbtools:38.96"
23+
String bbtools_container = "microbiomedata/bbtools:39.03"
2424
String spades_container="staphb/spades:4.0.0"
2525
}
2626

2727

2828
if (shortRead) {
29-
if (length(input_files) > 1){
30-
call int.make_interleaved_reads{
31-
input:
32-
input_files = input_files,
33-
container = bbtools_container
34-
29+
if (length(input_files) > 1) {
30+
call int.make_interleaved_reads {
31+
input:
32+
input_files = input_files,
33+
container = "microbiomedata/bbtools:38.96"
3534
}
3635
}
37-
call srma.jgi_metaASM{
36+
call srma.jgi_metaASM {
3837
input:
39-
memory = memory,
40-
threads = threads,
41-
input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0],
42-
proj = proj,
43-
bbtools_container = bbtools_container,
44-
spades_container = spades_container
45-
38+
memory = memory,
39+
threads = threads,
40+
input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0],
41+
proj = proj,
42+
bbtools_container = "microbiomedata/bbtools:38.96",
43+
spades_container = spades_container
4644
}
4745

4846
}
4947
if (!shortRead) {
50-
call lrma.metaflye{
48+
call lrma.metaflye {
5149
input:
52-
proj = proj,
53-
input_fastq = input_files,
54-
flye_container = flye_container,
55-
flye_parameters = flye_parameters,
56-
smrtlink_container = smrtlink_container,
57-
racon_container = racon_container,
58-
minimap2_container = minimap2_container,
59-
minimap2_parameters = minimap2_parameters,
60-
samtools_container = samtools_container,
61-
bbtools_container = bbtools_container
50+
proj = proj,
51+
input_fastq = input_files,
52+
flye_container = flye_container,
53+
flye_parameters = flye_parameters,
54+
smrtlink_container = smrtlink_container,
55+
racon_container = racon_container,
56+
minimap2_container = minimap2_container,
57+
minimap2_parameters = minimap2_parameters,
58+
samtools_container = samtools_container,
59+
bbtools_container = bbtools_container
6260
}
63-
call finish_lrasm{
61+
call finish_lrasm {
6462
input:
65-
proj = proj,
66-
prefix = prefix,
67-
container = bbtools_container,
68-
contigs = metaflye.final_contigs,
69-
bam = metaflye.final_bam,
70-
scaffolds = metaflye.final_scaffolds,
71-
agp = metaflye.final_agp,
72-
legend = metaflye.final_legend,
73-
basecov = metaflye.final_basecov,
74-
sam = metaflye.final_sam,
75-
output_file = metaflye.final_output_file,
76-
stats = metaflye.final_stats,
77-
summary_stats = metaflye.final_summary_stats,
78-
pileup_out = metaflye.final_pileup_out
63+
proj = proj,
64+
prefix = prefix,
65+
container = bbtools_container,
66+
contigs = metaflye.final_contigs,
67+
bam = metaflye.final_bam,
68+
scaffolds = metaflye.final_scaffolds,
69+
agp = metaflye.final_agp,
70+
legend = metaflye.final_legend,
71+
basecov = metaflye.final_basecov,
72+
sam = metaflye.final_sam,
73+
output_file = metaflye.final_output_file,
74+
stats = metaflye.final_stats,
75+
summary_stats = metaflye.final_summary_stats,
76+
pileup_out = metaflye.final_pileup_out
7977
}
8078
}
8179
output {
@@ -100,33 +98,34 @@ workflow jgi_metaAssembly{
10098
File? sr_bam=jgi_metaASM.bam
10199
File? sr_samgz=jgi_metaASM.samgz
102100
File? sr_covstats=jgi_metaASM.covstats
103-
File? sr_asmstats=jgi_metaASM.asmstats
104101
File? sr_asminfo=jgi_metaASM.asminfo
105102
File? sr_bbcms_fq = jgi_metaASM.bbcms_fastq
106-
103+
104+
#Both
105+
File? stats = if (shortRead) then jgi_metaASM.asmstats else finish_lrasm.asm_stats
107106
}
108107
}
109108

110109

111110
task finish_lrasm {
112111
input {
113-
File contigs
114-
File bam
115-
File scaffolds
116-
File agp
117-
File legend
118-
File basecov
119-
File sam
120-
File output_file
121-
File stats
122-
File summary_stats
123-
File pileup_out
124-
String container
125-
String proj
126-
String prefix
127-
String orig_prefix="scaffold"
128-
String sed="s/~{orig_prefix}_/~{proj}_/g"
129-
# String start
112+
File contigs
113+
File bam
114+
File scaffolds
115+
File agp
116+
File legend
117+
File basecov
118+
File sam
119+
File output_file
120+
File stats
121+
File summary_stats
122+
File pileup_out
123+
String container
124+
String proj
125+
String prefix
126+
String orig_prefix="scaffold"
127+
String sed="s/~{orig_prefix}_/~{proj}_/g"
128+
# String start
130129
}
131130
command<<<
132131
@@ -144,12 +143,17 @@ task finish_lrasm {
144143
cat ~{basecov} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.basecov
145144
cat ~{pileup_out} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.out
146145
147-
## Bam file
148-
samtools view -h ~{bam} | sed ~{sed} | \
149-
samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam
150-
## Sam.gz file
151-
samtools view -h ~{sam} | sed ~{sed} | \
152-
gzip -c - > ~{prefix}_pairedMapped.sam.gz
146+
## Bam file
147+
samtools view -h ~{bam} | sed ~{sed} | \
148+
samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam
149+
## Sam.gz file
150+
samtools view -h ~{sam} | sed ~{sed} | \
151+
gzip -c - > ~{prefix}_pairedMapped.sam.gz
152+
153+
# stats file
154+
bbstats.sh format=8 in=~{scaffolds} out=stats.json
155+
sed -i 's/l_gt50k/l_gt50K/g' stats.json
156+
cat stats.json |jq 'del(.filename)' > stats.json
153157
154158
>>>
155159
output {
@@ -164,6 +168,7 @@ task finish_lrasm {
164168
File final_stats = "~{prefix}_contigs.sam.stats"
165169
File final_summary_stats = "~{prefix}_summary.stats"
166170
File final_pileup_out = "~{prefix}_contigs.sorted.bam.pileup.out"
171+
File asm_stats = "stats.json"
167172
}
168173

169174
runtime {

0 commit comments

Comments
 (0)