Merge branch 'patch-2.5.1' of github.com:nf-core/eager into patch-2.5.1

jfy133 · jfy133 · commit 23326a4996e9 · 2024-02-16T10:22:26.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,21 @@
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [2.5.1] - YYYY-MM-DD
+
+### `Added`
+
+- [#1037](https://github.com/nf-core/eager/issues/1037) Added an option to deactivate the `-sorted` option of bedtools coverage, in case the feature file is not sorted the same way as the fasta file.
+
+### `Fixed`
+
+- [#1048](https://github.com/nf-core/eager/issues/1048) `--vcf2genome_outfile` parameter now gets prefixed by the sample_name and suffixed with `.fasta` (i.e. `<sample_name>_<vcf2genome_outfile>.fasta`). This ensures we avoid overwriting the output fasta of one sample with that of another when the option is provided. (♥ Thanks to @MeriamOs for reporting.)
+- [#1047](https://github.com/nf-core/eager/issues/1047) Changed the row some statistics were reported in the General Stats table. The File name collision fixed in 2.5.0 (see #1017) caused these statistics to be reported in the wrong row due to an added suffix.
+
+### `Dependencies`
+
+### `Deprecated`
+
 ## [2.5.0] - Bopfingen - 2023-11-03
 
 ### `Added`
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
@@ -59,6 +59,8 @@ extra_fn_clean_exts:
   - ".trimmed_stats"
   - "_libmerged"
   - "_bt2"
+  - type: "regex"
+    pattern: "_udg(half|none|full)"
 
 top_modules:
   - "fastqc":
diff --git a/main.nf b/main.nf
@@ -2063,13 +2063,14 @@ process bedtools {
   tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*")
 
   script:
+  sorting_of_anno = params.anno_file_is_unsorted ? "" : '-sorted'
   """
   ## Create genome file from bam header
   samtools view -H ${bam} | grep '@SQ' | sed 's#@SQ\tSN:\\|LN:##g' > genome.txt
   
   ##  Run bedtools
-  bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz
-  bedtools coverage -nonamecheck -g genome.txt -sorted -a ${anno_file} -b ${bam} -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz
+  bedtools coverage -nonamecheck -g genome.txt ${sorting_of_anno} -a ${anno_file} -b ${bam} | pigz -p ${task.cpus - 1} > "${bam.baseName}".breadth.gz
+  bedtools coverage -nonamecheck -g genome.txt ${sorting_of_anno} -a ${anno_file} -b ${bam} -mean | pigz -p ${task.cpus - 1} > "${bam.baseName}".depth.gz
   """
 }
 
@@ -2741,7 +2742,7 @@ process vcf2genome {
   tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path("*.fasta.gz")
 
   script:
-  def out = !params.vcf2genome_outfile ? "${samplename}.fasta" : "${params.vcf2genome_outfile}"
+  def out = !params.vcf2genome_outfile ? "${samplename}.fasta" : "${samplename}_${params.vcf2genome_outfile}.fasta"
   def fasta_head = !params.vcf2genome_header ? "${samplename}" : "${params.vcf2genome_header}"
   """
   pigz -d -f -p ${task.cpus} ${vcf}
diff --git a/nextflow.config b/nextflow.config
@@ -143,8 +143,9 @@ params {
   rescale_seqlength = 12
 
   //Bedtools settings
-  run_bedtools_coverage = false 
+  run_bedtools_coverage = false
   anno_file = null
+  anno_file_is_unsorted = false
 
   //bamUtils trimbam settings
   run_trim_bam = false 
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -981,6 +981,12 @@
                     "description": "Path to GFF or BED file containing positions of features in reference file (--fasta). Path should be enclosed in quotes.",
                     "fa_icon": "fas fa-file-signature",
                     "help_text": "Specify the path to a GFF/BED containing the feature coordinates (or any acceptable input for [`bedtools coverage`](https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html)). Must be in quotes.\n"
+                },
+                "anno_file_is_unsorted": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-random",
+                    "description": "Specify if the annotation file is not sorted in the same way as the reference fasta file.",
+                    "help_text": "In cases where the annotation file is NOT sorted the same way as the reference fasta, this option should be specified. This will significantly increase the memory usage of bedtools!\n\n> Modifies bedtools parameter: `-sorted`"
                 }
             },
             "fa_icon": "fas fa-scroll",
@@ -1330,9 +1336,9 @@
                 },
                 "vcf2genome_outfile": {
                     "type": "string",
-                    "description": "Specify name of the output FASTA file containing the consensus sequence. Do not include `.vcf` in the file name.",
+                    "description": "Specify the name of the output FASTA file containing the consensus sequence.",
                     "fa_icon": "fas fa-file-alt",
-                    "help_text": "The name of your requested output FASTA file. Do not include `.fasta` suffix.\n"
+                    "help_text": "The output FASTA file will be named `<sample_name>_<vcf2genome_outfile>.fasta`.\n"
                 },
                 "vcf2genome_header": {
                     "type": "string",
@@ -1718,7 +1724,7 @@
                 "maltextract_percentidentity": {
                     "type": "number",
                     "description": "Minimum percent identity alignments are required to have to be reported. Recommended to set same as MALT parameter.",
-                    "default": 85.0,
+                    "default": 85,
                     "fa_icon": "fas fa-id-card",
                     "help_text": "Minimum percent identity alignments are required to have to be reported. Higher values allows fewer mismatches between read and reference sequence, but therefore will provide greater confidence in the hit. Lower values allow more mismatches, which can account for damage and divergence of a related strain/species to the reference. Recommended to set same as MALT parameter or higher. Default: `85`.\n\nOnly when `--metagenomic_tool malt` is also supplied.\n\n> Modifies MaltExtract parameter: `--minPI`"
                 },