diff --git a/modules.json b/modules.json index 9f5802013..4281ec2e2 100644 --- a/modules.json +++ b/modules.json @@ -46,6 +46,26 @@ "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, + "catpack/bins": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "catpack/contigs": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "catpack/prepare": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "catpack/summarise": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, "centrifuge/centrifuge": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", diff --git a/modules/nf-core/catpack/bins/environment.yml b/modules/nf-core/catpack/bins/environment.yml new file mode 100644 index 000000000..39264bf45 --- /dev/null +++ b/modules/nf-core/catpack/bins/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/bins/main.nf b/modules/nf-core/catpack/bins/main.nf new file mode 100644 index 000000000..7f0625578 --- /dev/null +++ b/modules/nf-core/catpack/bins/main.nf @@ -0,0 +1,66 @@ +process CATPACK_BINS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(bins, stageAs: 'bins/*') + tuple val(meta2), path(database) + tuple val(meta3), path(taxonomy) + tuple val(meta4), path(proteins) + tuple val(meta5), path(diamond_table) + + output: + tuple val(meta), path("*.ORF2LCA.txt"), emit: orf2lca + tuple val(meta), path("*.bin2classification.txt"), emit: bin2classification + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.diamond"), optional: true, emit: diamond + tuple val(meta), path("*.predicted_proteins.faa"), optional: true, emit: faa + tuple val(meta), path("*.gff"), optional: true, emit: gff + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def premade_proteins = proteins ? "-p ${proteins}" : '' + def premade_table = diamond_table ? "-d ${diamond_table}" : '' + """ + CAT_pack bins \\ + -n ${task.cpus} \\ + -b bins/ \\ + -d ${database} \\ + -t ${taxonomy} \\ + ${premade_proteins} \\ + ${premade_table} \\ + -o ${prefix} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ORF2LCA.txt + touch ${prefix}.bin2classification.txt + touch ${prefix}.log + touch ${prefix}.diamond + touch ${prefix}.predicted_proteins.faa + touch ${prefix}.predicted_proteins.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/bins/meta.yml b/modules/nf-core/catpack/bins/meta.yml new file mode 100644 index 000000000..94503740c --- /dev/null +++ b/modules/nf-core/catpack/bins/meta.yml @@ -0,0 +1,167 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_bins" +description: Taxonomic classification of long DNA sequences and metagenome assembled genomes (e.g. MAGs / bins). +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - bins: + type: file + description: One or more nucleotide FASTA file containing binned long DNA sequences. + pattern: "*.{fasta,fna,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - database: + type: directory + description: Directory containing CAT_pack database files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - taxonomy: + type: directory + description: Directory containing CAT_pack taxonomy files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - proteins: + type: directory + description: Optional pre predicted-made proteins FASTA + pattern: "*.{fasta,faa,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - diamond_table: + type: directory + description: Optional pre-made DIAMOND alignment table + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_3751" + +output: + - orf2lca: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.ORF2LCA.txt": + type: file + description: A TSV file with per-ORF hit stats and identified lineage + pattern: "*.ORF2LCA.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - bin2classification: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.bin2classification.txt": + type: file + description: A TSV file with per-bin hit stats and assignment justification + information + pattern: "*.bin2classification.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.log": + type: file + description: Log file with run messages and basic statistics + ontologies: + - edam: "http://edamontology.org/format_2330" + - diamond: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.diamond": + type: file + description: Intermediate DIAMOND TSV summary output file with alignment results + ontologies: + - edam: "http://edamontology.org/format_3475" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.predicted_proteins.faa": + type: file + description: FAA file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_3475" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.gff": + type: file + description: GFF file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_2305" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/bins/tests/main.nf.test b/modules/nf-core/catpack/bins/tests/main.nf.test new file mode 100644 index 000000000..cb8bdde2d --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/main.nf.test @@ -0,0 +1,135 @@ +nextflow_process { + + name "Test Process CATPACK_BINS" + script "../main.nf" + process "CATPACK_BINS" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/bins" + tag "catpack/prepare" + tag "catpack/contigs" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - genome - fasta") { + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + process.out.orf2lca, + process.out.bin2classification, + process.out.diamond, + process.out.faa, + process.out.gff, + process.out.versions, + path(process.out.log.get(0).get(1)).readLines().last().contains("CAT is done!") + ).match() + } + ) + } + + } + + test("sarscov2 - genome - fasta - premade proteins") { + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + } + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = CATPACK_CONTIGS.out.faa + input[4] = [[:], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + process.out.orf2lca, + process.out.bin2classification, + process.out.diamond, + process.out.gff, + process.out.versions, + path(process.out.log.get(0).get(1)).readLines().last().contains("CAT is done!") + ).match() + } + ) + } + + } + + test("sarscov2 - genome - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/bins/tests/main.nf.test.snap b/modules/nf-core/catpack/bins/tests/main.nf.test.snap new file mode 100644 index 000000000..e3c80d702 --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/main.nf.test.snap @@ -0,0 +1,208 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,27e3ca35dc7b977653b5bbf18076fc26" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,8cb8c364c1dea229f68e8c7a1747205d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.alignment.diamond:md5,9e2f9c188b183c18dd9572395a48a066" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.predicted_proteins.faa:md5,1f8550f87d044d117422ca02827e4d18" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.predicted_proteins.gff:md5,cb63331a0282175669107585cf4a66c1" + ] + ], + [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + false + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T08:19:03.528409783" + }, + "sarscov2 - genome - fasta - premade proteins": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,27e3ca35dc7b977653b5bbf18076fc26" + ] + ], + [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,8cb8c364c1dea229f68e8c7a1747205d" + ] + ], + [ + [ + { + "id": "test" + }, + "test.concatenated.alignment.diamond:md5,9e2f9c188b183c18dd9572395a48a066" + ] + ], + [ + + ], + [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + false + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T06:02:16.99538445" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ], + "bin2classification": [ + [ + { + "id": "test" + }, + "test.bin2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "diamond": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "orf2lca": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ad0d24635fc75542a35bc9d98bc3e6f4" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-23T05:48:35.765674003" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/bins/tests/nextflow.config b/modules/nf-core/catpack/bins/tests/nextflow.config new file mode 100644 index 000000000..c1ac48567 --- /dev/null +++ b/modules/nf-core/catpack/bins/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } + + withName: CATPACK_BINS { + ext.args = "--bin_suffix .fasta" + } + + withName: CATPACK_ADDNAMES { + ext.args = "--only_official" + } +} diff --git a/modules/nf-core/catpack/contigs/environment.yml b/modules/nf-core/catpack/contigs/environment.yml new file mode 100644 index 000000000..39264bf45 --- /dev/null +++ b/modules/nf-core/catpack/contigs/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/contigs/main.nf b/modules/nf-core/catpack/contigs/main.nf new file mode 100644 index 000000000..b64da2885 --- /dev/null +++ b/modules/nf-core/catpack/contigs/main.nf @@ -0,0 +1,66 @@ +process CATPACK_CONTIGS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(contigs) + tuple val(meta2), path(database) + tuple val(meta3), path(taxonomy) + tuple val(meta4), path(proteins) + tuple val(meta5), path(diamond_table) + + output: + tuple val(meta), path("*.ORF2LCA.txt"), emit: orf2lca + tuple val(meta), path("*.contig2classification.txt"), emit: contig2classification + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.diamond"), optional: true, emit: diamond + tuple val(meta), path("*.predicted_proteins.faa"), optional: true, emit: faa + tuple val(meta), path("*.gff"), optional: true, emit: gff + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def premade_proteins = proteins ? "--proteins_fasta ${proteins}" : '' + def premade_table = diamond_table ? "--diamond_alignment ${diamond_table}" : '' + """ + CAT_pack contigs \\ + --nproc ${task.cpus} \\ + --contigs_fasta ${contigs} \\ + --database_folder ${database} \\ + --taxonomy_folder ${taxonomy} \\ + --out_prefix ${prefix} \\ + ${premade_proteins} \\ + ${premade_table} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ORF2LCA.txt + touch ${prefix}.contig2classification.txt + touch ${prefix}.log + touch ${prefix}.diamond + touch ${prefix}.predicted_proteins.faa + touch ${prefix}.predicted_proteins.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/contigs/meta.yml b/modules/nf-core/catpack/contigs/meta.yml new file mode 100644 index 000000000..092805d52 --- /dev/null +++ b/modules/nf-core/catpack/contigs/meta.yml @@ -0,0 +1,167 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_contigs" +description: Taxonomic classification of long DNA sequences and metagenome assembled genomes (e.g. contigs, MAGs / bins). +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - contigs: + type: file + description: A nucleotide FASTA file containing long DNA sequences such as contigs. + pattern: "*.{fasta,fna,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - database: + type: directory + description: Directory containing CAT_pack database files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - taxonomy: + type: directory + description: Directory containing CAT_pack taxonomy files (e.g. output from + CAT_pack prepare) + pattern: "*/" + ontologies: + - edam: "http://edamontology.org/data_1049" + + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - proteins: + type: directory + description: Optional pre predicted-made proteins FASTA + pattern: "*.{fasta,faa,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - diamond_table: + type: directory + description: Optional pre-made DIAMOND alignment table + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_3751" + +output: + - orf2lca: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.ORF2LCA.txt": + type: file + description: A TSV file with per-ORF hit stats and identified lineage + pattern: "*.ORF2LCA.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - contig2classification: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.contig2classification.txt": + type: file + description: A TSV file with per-contig hit stats and assignment justification + information + pattern: "*.contig2classification.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.log": + type: file + description: Log file with run messages and basic statistics + ontologies: + - edam: "http://edamontology.org/format_2330" + - diamond: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.diamond": + type: file + description: Intermediate DIAMOND TSV summary output file with alignment results + ontologies: + - edam: "http://edamontology.org/format_3475" + - faa: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.predicted_proteins.faa": + type: file + description: FAA file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_3475" + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.gff": + type: file + description: GFF file of DIAMOND predicted proteins hits + ontologies: + - edam: "http://edamontology.org/format_2305" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/contigs/tests/main.nf.test b/modules/nf-core/catpack/contigs/tests/main.nf.test new file mode 100644 index 000000000..d3ea8884b --- /dev/null +++ b/modules/nf-core/catpack/contigs/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process CATPACK_CONTIGS" + script "../main.nf" + process "CATPACK_CONTIGS" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/contigs" + tag "catpack/prepare" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - genome - fasta") { + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot ( + process.out.orf2lca, + process.out.contig2classification, + process.out.diamond, + process.out.faa, + process.out.gff, + process.out.versions, + path(process.out.log.get(0).get(1)).readLines().last().contains("CAT is done!") + ).match() + } + ) + } + + } + + test("sarscov2 - genome - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/contigs/tests/main.nf.test.snap b/modules/nf-core/catpack/contigs/tests/main.nf.test.snap new file mode 100644 index 000000000..d6f593033 --- /dev/null +++ b/modules/nf-core/catpack/contigs/tests/main.nf.test.snap @@ -0,0 +1,168 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,a623b47f20751db12ce18c9ca7ac2536" + ] + ], + [ + [ + { + "id": "test" + }, + "test.contig2classification.txt:md5,3c3c79045bf6ae8b1292ae9afa2ec4af" + ] + ], + [ + [ + { + "id": "test" + }, + "test.alignment.diamond:md5,9e2f9c188b183c18dd9572395a48a066" + ] + ], + [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,1f8550f87d044d117422ca02827e4d18" + ] + ], + [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,4fc7a311726723ce5b17cede1dd1059c" + ] + ], + [ + "versions.yml:md5,05d7ad5d0b02dc1a17b59b95420e961f" + ], + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-19T08:41:42.691801894" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.contig2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,05d7ad5d0b02dc1a17b59b95420e961f" + ], + "contig2classification": [ + [ + { + "id": "test" + }, + "test.contig2classification.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "diamond": [ + [ + { + "id": "test" + }, + "test.diamond:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "faa": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.faa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff": [ + [ + { + "id": "test" + }, + "test.predicted_proteins.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "orf2lca": [ + [ + { + "id": "test" + }, + "test.ORF2LCA.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05d7ad5d0b02dc1a17b59b95420e961f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-20T12:52:15.350648714" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/contigs/tests/nextflow.config b/modules/nf-core/catpack/contigs/tests/nextflow.config new file mode 100644 index 000000000..07f33523c --- /dev/null +++ b/modules/nf-core/catpack/contigs/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } +} diff --git a/modules/nf-core/catpack/prepare/environment.yml b/modules/nf-core/catpack/prepare/environment.yml new file mode 100644 index 000000000..39264bf45 --- /dev/null +++ b/modules/nf-core/catpack/prepare/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/prepare/main.nf b/modules/nf-core/catpack/prepare/main.nf new file mode 100644 index 000000000..c6b612b67 --- /dev/null +++ b/modules/nf-core/catpack/prepare/main.nf @@ -0,0 +1,60 @@ +process CATPACK_PREPARE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(db_fasta) + path names + path nodes + path acc2tax + + output: + tuple val(meta), path("${prefix}/db/"), emit: db + tuple val(meta), path("${prefix}/tax/"), emit: taxonomy + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + CAT_pack prepare \\ + -n ${task.cpus} \\ + --db_fasta ${db_fasta} \\ + --names ${names} \\ + --nodes ${nodes} \\ + --acc2tax ${acc2tax} \\ + --db_dir ${prefix}/ \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch database.log + mkdir -p ${prefix}/db + touch ${prefix}/db/database.dmnd + touch ${prefix}/db/database.fastaid2LCAtaxid + touch ${prefix}/db/database.taxids_with_multiple_offspring + mkdir -p ${prefix}/tax + touch ${prefix}/tax/nodes.dmp + touch ${prefix}/tax/names.dmp + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/prepare/meta.yml b/modules/nf-core/catpack/prepare/meta.yml new file mode 100644 index 000000000..591a485ee --- /dev/null +++ b/modules/nf-core/catpack/prepare/meta.yml @@ -0,0 +1,92 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_prepare" +description: Creates a CAT_pack database based on input FASTAs +keywords: + - catpack + - cat + - prepare + - database + - profiling + - build +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - db_fasta: + type: file + description: A FASTA file containing all sequences to be included in the database + pattern: "*.{fasta,fa,fna}" + ontologies: + - edam: "http://edamontology.org/format_1929" + - - names: + type: file + description: An NCBI taxonomy-style names text file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + - - nodes: + type: file + description: An NCBI taxonomy-style nodes text file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + - - acc2tax: + type: file + description: An NCBI taxonomy names accession to taxonomy file + pattern: "*" + ontologies: + - edam: "http://edamontology.org/format_1964" + +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}/db/": + type: directory + description: Directory containing CAT database files + pattern: "${db}/" + ontologies: + - edam: "http://edamontology.org/data_1049" + - taxonomy: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "${prefix}/tax/": + type: directory + description: Directory containing CAT prepared taxonomy database files + pattern: "${db}/" + ontologies: + - edam: "http://edamontology.org/data_1049" + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/prepare/tests/main.nf.test b/modules/nf-core/catpack/prepare/tests/main.nf.test new file mode 100644 index 000000000..4e96ad9eb --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process CATPACK_PREPARE" + script "../main.nf" + process "CATPACK_PREPARE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/prepare" + + test("metagenome -sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + + then { + def stablefiles = [] + file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (!file.isDirectory() && !["database.dmnd", "database.log", "database.fastaid2LCAtaxid", "database.taxids_with_multiple_offspring"].find {file.toString().endsWith(it)}) {stablefiles.add(file)} } + def unstablefiles = [] + file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (["database.dmnd", "database.log", "database.fastaid2LCAtaxid", "database.taxids_with_multiple_offspring"].find {file.toString().endsWith(it)}) {unstablefiles.add(file.getName().toString())} } + assertAll( + { assert process.success }, + { assert snapshot( + stablefiles.sort(), + unstablefiles.sort(), + process.out.taxonomy, + process.out.versions + ).match() } + ) + } + + } + + test("metagenome -sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/catpack/prepare/tests/main.nf.test.snap b/modules/nf-core/catpack/prepare/tests/main.nf.test.snap new file mode 100644 index 000000000..0a8975ffe --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "metagenome -sarscov2 - fasta": { + "content": [ + [ + + ], + [ + "database.dmnd", + "database.fastaid2LCAtaxid", + "database.taxids_with_multiple_offspring" + ], + [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,c471c27a4ce85ae74d2c63633c9ce1e3", + "nodes.dmp:md5,130f9132095562e09c732679c562f5e9" + ] + ] + ], + [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-25T16:19:17.838393903" + }, + "metagenome -sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "database.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.fastaid2LCAtaxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.taxids_with_multiple_offspring:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "database.dmnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.fastaid2LCAtaxid:md5,d41d8cd98f00b204e9800998ecf8427e", + "database.taxids_with_multiple_offspring:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "taxonomy": [ + [ + { + "id": "test" + }, + [ + "names.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", + "nodes.dmp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,d851e296d4025a8060b6283ad3b63937" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-19T07:53:12.595910196" + } +} \ No newline at end of file diff --git a/modules/nf-core/catpack/prepare/tests/nextflow.config b/modules/nf-core/catpack/prepare/tests/nextflow.config new file mode 100644 index 000000000..07f33523c --- /dev/null +++ b/modules/nf-core/catpack/prepare/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } +} diff --git a/modules/nf-core/catpack/summarise/environment.yml b/modules/nf-core/catpack/summarise/environment.yml new file mode 100644 index 000000000..39264bf45 --- /dev/null +++ b/modules/nf-core/catpack/summarise/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cat=6.0.1 diff --git a/modules/nf-core/catpack/summarise/main.nf b/modules/nf-core/catpack/summarise/main.nf new file mode 100644 index 000000000..b2cea90a9 --- /dev/null +++ b/modules/nf-core/catpack/summarise/main.nf @@ -0,0 +1,62 @@ +process CATPACK_SUMMARISE { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/cat:6.0.1--hdfd78af_1' + : 'biocontainers/cat:6.0.1--hdfd78af_1'}" + + input: + tuple val(meta), path(classification) + tuple val(meta2), path(contigs) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${classification}" == "${prefix}.txt") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + def insert_contigs = contigs ? "-c ${contigs}" : '' + """ + CAT_pack summarise \\ + ${args} \\ + -i ${classification} \\ + ${insert_contigs} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${classification}" == "${prefix}.txt") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + def insert_contigs = contigs ? "-c ${contigs}" : '' + """ + echo "CAT_pack summarise \\ + ${args} \\ + -i ${classification} \\ + ${insert_contigs} \\ + -o ${prefix}.txt" + + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + catpack: \$(CAT_pack --version | sed 's/CAT_pack pack v//g;s/ .*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/catpack/summarise/meta.yml b/modules/nf-core/catpack/summarise/meta.yml new file mode 100644 index 000000000..eb7f6cb1c --- /dev/null +++ b/modules/nf-core/catpack/summarise/meta.yml @@ -0,0 +1,72 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "catpack_summarise" +description: Summarises results from CAT/BAT/RAT classification steps +keywords: + - taxonomic classification + - classification + - long reads + - mags + - assembly +tools: + - "catpack": + description: "CAT/BAT: tool for taxonomic classification of contigs and metagenome-assembled + genomes (MAGs)" + homepage: "https://github.com/MGXlab/CAT_pack" + documentation: "https://github.com/MGXlab/CAT_pack" + tool_dev_url: "https://github.com/MGXlab/CAT_pack" + doi: "10.1186/s13059-019-1817-x" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - classification: + type: file + description: CAT/BAT/RAT classification table annotated with official names (from CAT_pack addnames) + pattern: "*.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - contigs: + type: file + description: Optional nucleotide FASTA file containing long DNA sequences such as contigs that were classified (only if classification table is from CAT_pack contigs) + pattern: "*.{fasta,fna,fa,fas}" + ontologies: + - edam: "http://edamontology.org/format_1929" + +output: + - txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.txt": + type: file + description: | + Summary statistics table of CAT/BAT/RAT results + pattern: "*.txt" + ontologies: + - edam: "http://edamontology.org/format_3475" + + - versions: + - "versions.yml": + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/catpack/summarise/tests/main.nf.test b/modules/nf-core/catpack/summarise/tests/main.nf.test new file mode 100644 index 000000000..ac87e1f64 --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/main.nf.test @@ -0,0 +1,158 @@ +nextflow_process { + + name "Test Process CATPACK_SUMMARISE" + script "../main.nf" + process "CATPACK_SUMMARISE" + config './nextflow.config' + + tag "modules" + tag "modules_nfcore" + tag "catpack" + tag "catpack/summarise" + tag "catpack/prepare" + tag "catpack/contigs" + tag "catpack/bins" + tag "catpack/addnames" + + setup { + run('CATPACK_PREPARE') { + script '../../prepare/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] + input[1] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_nodes.dmp', checkIfExists: true) ] + input[2] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot_names.dmp', checkIfExists: true) ] + input[3] = [ file(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/prot.accession2taxid.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - contigs - with names") { + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + run('CATPACK_ADDNAMES') { + script '../../addnames/main.nf' + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_ADDNAMES.out.txt + input[1] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bins - raw") { + + setup { + run('CATPACK_BINS') { + script '../../bins/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + + run('CATPACK_ADDNAMES') { + script '../../addnames/main.nf' + process { + """ + input[0] = CATPACK_BINS.out.bin2classification + input[1] = CATPACK_PREPARE.out.taxonomy + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_ADDNAMES.out.txt + input[1] = [[:],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - contigs - stub") { + + options "-stub" + + setup { + run('CATPACK_CONTIGS') { + script '../../contigs/main.nf' + process { + """ + input[0] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + input[1] = CATPACK_PREPARE.out.db + input[2] = CATPACK_PREPARE.out.taxonomy + input[3] = [[:], []] + input[4] = [[:], []] + """ + } + } + } + + when { + process { + """ + input[0] = CATPACK_CONTIGS.out.contig2classification + input[1] = [ [id:'test'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/catpack/summarise/tests/main.nf.test.snap b/modules/nf-core/catpack/summarise/tests/main.nf.test.snap new file mode 100644 index 000000000..72bd16c4b --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - contigs - with names": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,f4ae1e889f762d6123e4b8d9e57aa4cc" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,f4ae1e889f762d6123e4b8d9e57aa4cc" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:27:47.176876392" + }, + "sarscov2 - contigs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:28:08.710621691" + }, + "sarscov2 - bins - raw": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,2fc7ecc57cfe0c3362fba27e9693d266" + ] + ], + "1": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ], + "txt": [ + [ + { + "id": "test" + }, + "test_summary.txt:md5,2fc7ecc57cfe0c3362fba27e9693d266" + ] + ], + "versions": [ + "versions.yml:md5,6f0ab36312198982a6de7194fef81c72" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-24T06:27:58.559081161" + } +} diff --git a/modules/nf-core/catpack/summarise/tests/nextflow.config b/modules/nf-core/catpack/summarise/tests/nextflow.config new file mode 100644 index 000000000..0302d9cf4 --- /dev/null +++ b/modules/nf-core/catpack/summarise/tests/nextflow.config @@ -0,0 +1,17 @@ +process { + withName: CATPACK_PREPARE { + ext.args = "--common_prefix database" + } + + withName: CATPACK_BINS { + ext.args = "--bin_suffix .fasta" + } + + withName: CATPACK_ADDNAMES { + ext.args = "--only_official" + } + + withName: CATPACK_SUMMARISE { + ext.prefix = { "${meta.id}_summary" } + } +}