Skip to content

Commit 73da03c

Browse files
authored
Fastq decontaminate deacon (#9302)
* inital template * first version * review update
1 parent b25dc59 commit 73da03c

File tree

4 files changed

+369
-0
lines changed

4 files changed

+369
-0
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
include { DEACON_INDEX } from '../../../modules/nf-core/deacon/index/main'
2+
include { DEACON_FILTER } from '../../../modules/nf-core/deacon/filter/main'
3+
4+
workflow FASTQ_DECONTAMINATE_DEACON {
5+
6+
take:
7+
ch_fasta_reads // [ val(meta), [ fasta ], [ reads ] ]
8+
9+
main:
10+
11+
ch_versions = Channel.empty()
12+
13+
ch_fasta = ch_fasta_reads
14+
.map { meta, fasta, reads -> [ meta, fasta ] }
15+
// Check if fastqs are single-end or paired-end and run Deacon accordingly
16+
ch_reads = ch_fasta_reads
17+
.map { meta, fasta, reads ->
18+
if (meta.single_end) {
19+
if (reads instanceof List && reads.size() != 1) {
20+
error("Error: Check your meta.single_end value. Single-end reads should contain one file only.")
21+
}
22+
return [ meta, reads ]
23+
} else {
24+
if (!(reads instanceof List) || reads.size() != 2) {
25+
error("Error: Check your meta.single_end value. Paired-end reads should contain two files; a forward and a reverse.")
26+
}
27+
return [ meta, reads ]
28+
}
29+
}
30+
31+
DEACON_INDEX ( ch_fasta )
32+
ch_versions = ch_versions.mix(DEACON_INDEX.out.versions.first())
33+
34+
DEACON_FILTER(DEACON_INDEX.out.index.join(ch_reads))
35+
ch_versions = ch_versions.mix(DEACON_FILTER.out.versions.first())
36+
37+
emit:
38+
index = DEACON_INDEX.out.index // channel: [ val(meta), [ index ] ]
39+
fastq_filtered = DEACON_FILTER.out.fastq_filtered // channel: [ val(meta), [ fastq ] ]
40+
summary = DEACON_FILTER.out.log // channel: [ val(meta), [ log ] ]
41+
42+
versions = ch_versions // channel: [ versions.yml ]
43+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
2+
name: "fastq_decontaminate_deacon"
3+
description: |
4+
Decontaminate FastQ files by filtering reads that match a reference genome using Deacon
5+
keywords:
6+
- filter
7+
- index
8+
- fasta
9+
- fastq
10+
- genome
11+
- reference
12+
- minimizer
13+
- decontamination
14+
components:
15+
- deacon/index
16+
- deacon/filter
17+
input:
18+
- ch_fasta_reads:
19+
type: file
20+
description: |
21+
Input genome fasta file and a list of FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
22+
Structure: [ val(meta), path(fasta), [ path(reads) ] ]
23+
output:
24+
- index:
25+
type: file
26+
description: |
27+
Deacon minimizer index file
28+
Structure: [ val(meta), path(index) ]
29+
pattern: ".idx"
30+
- fastq_filtered:
31+
type: file
32+
description: |
33+
List of output filtered FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
34+
Structure: [ val(meta), path(${prefix}*.fq) ]
35+
pattern: "*.fq"
36+
- summary:
37+
type: file
38+
description: |
39+
JSON file containing summary of results.
40+
Structure: [ val(meta), path(${prefix}.json) ]
41+
pattern: "*.json"
42+
- versions:
43+
type: file
44+
description: |
45+
File containing software versions
46+
Structure: [ path(versions.yml) ]
47+
pattern: "versions.yml"
48+
authors:
49+
- "@Baksic-Ivan"
50+
- "@Omer0191"
51+
maintainers:
52+
- "@Baksic-Ivan"
53+
- "@Omer0191"
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
nextflow_workflow {
2+
3+
name "Test Subworkflow FASTQ_DECONTAMINATE_DEACON"
4+
script "../main.nf"
5+
workflow "FASTQ_DECONTAMINATE_DEACON"
6+
7+
tag "subworkflows"
8+
tag "subworkflows_nfcore"
9+
tag "subworkflows/fastq_decontaminate_deacon"
10+
tag "deacon"
11+
tag "deacon/index"
12+
tag "deacon/filter"
13+
14+
test("sarscov2 - fastq - single-end") {
15+
16+
when {
17+
workflow {
18+
"""
19+
input[0] = Channel.of(
20+
[
21+
[ id:'test', single_end:true ], // meta map
22+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
23+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
24+
]
25+
)
26+
"""
27+
}
28+
}
29+
30+
then {
31+
assertAll(
32+
{ assert workflow.success },
33+
{ assert snapshot(
34+
workflow.out.index,
35+
workflow.out.fastq_filtered,
36+
file(workflow.out.summary[0][1]).name,
37+
workflow.out.versions.collect { path(it).yaml }
38+
).match()}
39+
)
40+
}
41+
}
42+
43+
test("sarscov2 - fastq - paired-end") {
44+
45+
when {
46+
workflow {
47+
"""
48+
input[0] = Channel.of(
49+
[
50+
[ id:'test', single_end:false ], // meta map
51+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
52+
[
53+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
54+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
55+
]
56+
]
57+
)
58+
"""
59+
}
60+
}
61+
62+
then {
63+
assertAll(
64+
{ assert workflow.success },
65+
{ assert snapshot(
66+
workflow.out.index,
67+
workflow.out.fastq_filtered,
68+
file(workflow.out.summary[0][1]).name,
69+
workflow.out.versions.collect { path(it).yaml }
70+
).match()}
71+
)
72+
}
73+
}
74+
75+
test("sarscov2 - fastq - single-end - stub") {
76+
77+
options "-stub"
78+
79+
when {
80+
workflow {
81+
"""
82+
input[0] = Channel.of(
83+
[
84+
[ id:'test', single_end:true ], // meta map
85+
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
86+
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
87+
]
88+
)
89+
"""
90+
}
91+
}
92+
93+
then {
94+
assertAll(
95+
{ assert workflow.success },
96+
{ assert snapshot(
97+
workflow.out,
98+
workflow.out.versions.collect { path(it).yaml }
99+
).match()}
100+
)
101+
}
102+
}
103+
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
{
2+
"sarscov2 - fastq - single-end - stub": {
3+
"content": [
4+
{
5+
"0": [
6+
[
7+
{
8+
"id": "test",
9+
"single_end": true
10+
},
11+
"genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
12+
]
13+
],
14+
"1": [
15+
[
16+
{
17+
"id": "test",
18+
"single_end": true
19+
},
20+
"test.fq:md5,d41d8cd98f00b204e9800998ecf8427e"
21+
]
22+
],
23+
"2": [
24+
[
25+
{
26+
"id": "test",
27+
"single_end": true
28+
},
29+
"test.json:md5,d41d8cd98f00b204e9800998ecf8427e"
30+
]
31+
],
32+
"3": [
33+
"versions.yml:md5,721349aff7673fd153a9e391c7eab88b",
34+
"versions.yml:md5,f1355ede970f7e54ac452275cfab13d8"
35+
],
36+
"fastq_filtered": [
37+
[
38+
{
39+
"id": "test",
40+
"single_end": true
41+
},
42+
"test.fq:md5,d41d8cd98f00b204e9800998ecf8427e"
43+
]
44+
],
45+
"index": [
46+
[
47+
{
48+
"id": "test",
49+
"single_end": true
50+
},
51+
"genome.idx:md5,d41d8cd98f00b204e9800998ecf8427e"
52+
]
53+
],
54+
"summary": [
55+
[
56+
{
57+
"id": "test",
58+
"single_end": true
59+
},
60+
"test.json:md5,d41d8cd98f00b204e9800998ecf8427e"
61+
]
62+
],
63+
"versions": [
64+
"versions.yml:md5,721349aff7673fd153a9e391c7eab88b",
65+
"versions.yml:md5,f1355ede970f7e54ac452275cfab13d8"
66+
]
67+
},
68+
[
69+
{
70+
"FASTQ_DECONTAMINATE_DEACON:DEACON_FILTER": {
71+
"deacon": "0.5.0"
72+
}
73+
},
74+
{
75+
"FASTQ_DECONTAMINATE_DEACON:DEACON_INDEX": {
76+
"deacon": "0.5.0"
77+
}
78+
}
79+
]
80+
],
81+
"meta": {
82+
"nf-test": "0.9.3",
83+
"nextflow": "25.10.0"
84+
},
85+
"timestamp": "2025-10-29T16:23:17.92702932"
86+
},
87+
"sarscov2 - fastq - paired-end": {
88+
"content": [
89+
[
90+
[
91+
{
92+
"id": "test",
93+
"single_end": false
94+
},
95+
"test.idx:md5,7437b25e6659ebfd5cf9be7b325f90c3"
96+
]
97+
],
98+
[
99+
[
100+
{
101+
"id": "test",
102+
"single_end": false
103+
},
104+
[
105+
"test_1.fq:md5,bf1aa22249e7e2b462c6be1a53ac55e9",
106+
"test_2.fq:md5,d3106ad7395c78a212e6214177d5c054"
107+
]
108+
]
109+
],
110+
"test.json",
111+
[
112+
{
113+
"FASTQ_DECONTAMINATE_DEACON:DEACON_FILTER": {
114+
"deacon": "0.5.0"
115+
}
116+
},
117+
{
118+
"FASTQ_DECONTAMINATE_DEACON:DEACON_INDEX": {
119+
"deacon": "0.5.0"
120+
}
121+
}
122+
]
123+
],
124+
"meta": {
125+
"nf-test": "0.9.3",
126+
"nextflow": "25.10.0"
127+
},
128+
"timestamp": "2025-10-29T16:23:11.422525467"
129+
},
130+
"sarscov2 - fastq - single-end": {
131+
"content": [
132+
[
133+
[
134+
{
135+
"id": "test",
136+
"single_end": true
137+
},
138+
"test.idx:md5,7437b25e6659ebfd5cf9be7b325f90c3"
139+
]
140+
],
141+
[
142+
[
143+
{
144+
"id": "test",
145+
"single_end": true
146+
},
147+
"test.fq:md5,2250e99ec61fca0e28e2dcb5124d1a16"
148+
]
149+
],
150+
"test.json",
151+
[
152+
{
153+
"FASTQ_DECONTAMINATE_DEACON:DEACON_FILTER": {
154+
"deacon": "0.5.0"
155+
}
156+
},
157+
{
158+
"FASTQ_DECONTAMINATE_DEACON:DEACON_INDEX": {
159+
"deacon": "0.5.0"
160+
}
161+
}
162+
]
163+
],
164+
"meta": {
165+
"nf-test": "0.9.3",
166+
"nextflow": "25.10.0"
167+
},
168+
"timestamp": "2025-10-29T16:23:02.404065827"
169+
}
170+
}

0 commit comments

Comments
 (0)