Skip to content

Commit

Permalink
Merge pull request #25 from microbiomedata/21-stats-file
Browse files Browse the repository at this point in the history
21 Added in stats file for outputs
  • Loading branch information
vlilanl authored Nov 14, 2024
2 parents 8d7df73 + 34e83d1 commit 1274bc9
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 98 deletions.
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Metagenome Assembly Workflow (v1.0.2)
========================================
=====================================

.. image:: workflow_assembly.png
:scale: 60%
Expand Down
Binary file modified docs/workflow_assembly.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions input.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"jgi_metaAssembly.input_files":["https://data.microbiomedata.org/data/test_data/11809.7.220839.TCCTGAG-ACTGCAT.fastq.gz"],
"jgi_metaAssembly.input_files":["/global/cfs/cdirs/m3408/www/test_data/SRR13128014.pacbio.subsample.ccs.fastq.gz"],
"jgi_metaAssembly.proj":"nmdc:503125_160870",
"jgi_metaAssembly.memory": "105G",
"jgi_metaAssembly.threads": "16",
"jgi_metaAssembly.shortRead": true
"jgi_metaAssembly.shortRead": false
}
139 changes: 72 additions & 67 deletions jgi_assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import "shortReads_assembly.wdl" as srma
import "make_interleaved_WDL/make_interleaved_reads.wdl" as int
import "https://code.jgi.doe.gov/BFoster/jgi_meta_wdl/-/raw/bc7c4371ea0fa83355bada341ec353b9feb3eff2/metagenome_improved/metaflye.wdl" as lrma

workflow jgi_metaAssembly{
workflow jgi_metaAssembly {
input {
Boolean shortRead
String proj
Expand All @@ -20,62 +20,60 @@ workflow jgi_metaAssembly{
String minimap2_container = "staphb/minimap2:2.25"
String minimap2_parameters = "-a -x map-hifi -t 32"
String samtools_container = "staphb/samtools:1.18"
String bbtools_container = "microbiomedata/bbtools:38.96"
String bbtools_container = "microbiomedata/bbtools:39.03"
String spades_container="staphb/spades:4.0.0"
}


if (shortRead) {
if (length(input_files) > 1){
call int.make_interleaved_reads{
input:
input_files = input_files,
container = bbtools_container

if (length(input_files) > 1) {
call int.make_interleaved_reads {
input:
input_files = input_files,
container = "microbiomedata/bbtools:38.96"
}
}
call srma.jgi_metaASM{
call srma.jgi_metaASM {
input:
memory = memory,
threads = threads,
input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0],
proj = proj,
bbtools_container = bbtools_container,
spades_container = spades_container

memory = memory,
threads = threads,
input_file = if length(input_files) > 1 then make_interleaved_reads.interleaved_fastq else input_files[0],
proj = proj,
bbtools_container = "microbiomedata/bbtools:38.96",
spades_container = spades_container
}

}
if (!shortRead) {
call lrma.metaflye{
call lrma.metaflye {
input:
proj = proj,
input_fastq = input_files,
flye_container = flye_container,
flye_parameters = flye_parameters,
smrtlink_container = smrtlink_container,
racon_container = racon_container,
minimap2_container = minimap2_container,
minimap2_parameters = minimap2_parameters,
samtools_container = samtools_container,
bbtools_container = bbtools_container
proj = proj,
input_fastq = input_files,
flye_container = flye_container,
flye_parameters = flye_parameters,
smrtlink_container = smrtlink_container,
racon_container = racon_container,
minimap2_container = minimap2_container,
minimap2_parameters = minimap2_parameters,
samtools_container = samtools_container,
bbtools_container = bbtools_container
}
call finish_lrasm{
call finish_lrasm {
input:
proj = proj,
prefix = prefix,
container = bbtools_container,
contigs = metaflye.final_contigs,
bam = metaflye.final_bam,
scaffolds = metaflye.final_scaffolds,
agp = metaflye.final_agp,
legend = metaflye.final_legend,
basecov = metaflye.final_basecov,
sam = metaflye.final_sam,
output_file = metaflye.final_output_file,
stats = metaflye.final_stats,
summary_stats = metaflye.final_summary_stats,
pileup_out = metaflye.final_pileup_out
proj = proj,
prefix = prefix,
container = bbtools_container,
contigs = metaflye.final_contigs,
bam = metaflye.final_bam,
scaffolds = metaflye.final_scaffolds,
agp = metaflye.final_agp,
legend = metaflye.final_legend,
basecov = metaflye.final_basecov,
sam = metaflye.final_sam,
output_file = metaflye.final_output_file,
stats = metaflye.final_stats,
summary_stats = metaflye.final_summary_stats,
pileup_out = metaflye.final_pileup_out
}
}
output {
Expand All @@ -100,33 +98,34 @@ workflow jgi_metaAssembly{
File? sr_bam=jgi_metaASM.bam
File? sr_samgz=jgi_metaASM.samgz
File? sr_covstats=jgi_metaASM.covstats
File? sr_asmstats=jgi_metaASM.asmstats
File? sr_asminfo=jgi_metaASM.asminfo
File? sr_bbcms_fq = jgi_metaASM.bbcms_fastq
#Both
File? stats = if (shortRead) then jgi_metaASM.asmstats else finish_lrasm.asm_stats
}
}
task finish_lrasm {
input {
File contigs
File bam
File scaffolds
File agp
File legend
File basecov
File sam
File output_file
File stats
File summary_stats
File pileup_out
String container
String proj
String prefix
String orig_prefix="scaffold"
String sed="s/~{orig_prefix}_/~{proj}_/g"
# String start
File contigs
File bam
File scaffolds
File agp
File legend
File basecov
File sam
File output_file
File stats
File summary_stats
File pileup_out
String container
String proj
String prefix
String orig_prefix="scaffold"
String sed="s/~{orig_prefix}_/~{proj}_/g"
# String start
}
command<<<
Expand All @@ -144,12 +143,17 @@ task finish_lrasm {
cat ~{basecov} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.basecov
cat ~{pileup_out} | sed ~{sed} > ~{prefix}_contigs.sorted.bam.pileup.out
## Bam file
samtools view -h ~{bam} | sed ~{sed} | \
samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam
## Sam.gz file
samtools view -h ~{sam} | sed ~{sed} | \
gzip -c - > ~{prefix}_pairedMapped.sam.gz
## Bam file
samtools view -h ~{bam} | sed ~{sed} | \
samtools view -hb -o ~{prefix}_pairedMapped_sorted.bam
## Sam.gz file
samtools view -h ~{sam} | sed ~{sed} | \
gzip -c - > ~{prefix}_pairedMapped.sam.gz
# stats file
bbstats.sh format=8 in=~{scaffolds} out=stats.json
sed -i 's/l_gt50k/l_gt50K/g' stats.json
cat stats.json |jq 'del(.filename)' > stats.json
>>>
output {
Expand All @@ -164,6 +168,7 @@ task finish_lrasm {
File final_stats = "~{prefix}_contigs.sam.stats"
File final_summary_stats = "~{prefix}_summary.stats"
File final_pileup_out = "~{prefix}_contigs.sorted.bam.pileup.out"
File asm_stats = "stats.json"
}

runtime {
Expand Down
Loading

0 comments on commit 1274bc9

Please sign in to comment.