nf-core
diff --git a/‎.github/workflows/ci.yml
Lines changed: 12 additions & 37 deletions b/‎.github/workflows/ci.yml
Lines changed: 12 additions & 37 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 24 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 24 additions & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 2 additions & 2 deletions b/‎Dockerfile
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 39 additions & 45 deletions b/‎README.md
Lines changed: 39 additions & 45 deletions
diff --git a/‎assets/multiqc_config.yaml
Lines changed: 1 addition & 2 deletions b/‎assets/multiqc_config.yaml
Lines changed: 1 addition & 2 deletions
diff --git a/‎bin/scrape_software_versions.py
Lines changed: 6 additions & 2 deletions b/‎bin/scrape_software_versions.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎conf/base.config
Lines changed: 1 addition & 1 deletion b/‎conf/base.config
Lines changed: 1 addition & 1 deletion
diff --git a/‎conf/test_resources.config
Lines changed: 4 additions & 0 deletions b/‎conf/test_resources.config
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/images/output/overview/eager2_metromap_complex.png
-514 KB b/‎docs/images/output/overview/eager2_metromap_complex.png
-514 KB
diff --git a/‎docs/images/output/overview/eager2_workflow.png
-192 KB b/‎docs/images/output/overview/eager2_workflow.png
-192 KB
@@ -34,13 +34,13 @@ jobs:
 
       - name: Build new docker image
         if: env.MATCHED_FILES
-        run: docker build --no-cache . -t nfcore/eager:2.2.2
+        run: docker build --no-cache . -t nfcore/eager:2.3
 
       - name: Pull docker image
         if: ${{ !env.MATCHED_FILES }}
         run: |
           docker pull nfcore/eager:dev
-          docker tag nfcore/eager:dev nfcore/eager:2.2.2
+          docker tag nfcore/eager:dev nfcore/eager:2.3
 
       - name: Install Nextflow
         env:
@@ -146,16 +146,16 @@ jobs:
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools
       - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
       - name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
       - name: GENOTYPING_UG ON TRIMMED BAM Test
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
       - name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
         run: |
-         nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
       - name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam
@@ -167,6 +167,9 @@ jobs:
       - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output
+      - name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT
+        run: |
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter
       - name: MALTEXTRACT Download resource files
         run: |
             mkdir -p databases/maltextract
@@ -186,34 +189,6 @@ jobs:
       - name: MTNUCRATIO Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nmtnucratio
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio
-  
-  push_dockerhub:
-    name: Push new Docker image to Docker Hub
-    runs-on: ubuntu-latest
-    # Only run if the tests passed
-    needs: test
-    # Only run for the nf-core repo, for releases and merged PRs
-    if: ${{ github.repository == 'nf-core/eager' && (github.event_name == 'release' || github.event_name == 'push') }}
-    env:
-      DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
-      DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
-    steps:
-      - name: Check out pipeline code
-        uses: actions/checkout@v2
-
-      - name: Build new docker image
-        run: docker build --no-cache . -t nfcore/eager:latest
-
-      - name: Push Docker image to DockerHub (dev)
-        if: ${{ github.event_name == 'push' }}
-        run: |
-          echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
-          docker tag nfcore/eager:latest nfcore/eager:dev
-          docker push nfcore/eager:dev
-      - name: Push Docker image to DockerHub (release)
-        if: ${{ github.event_name == 'release' }}
-        run: |
-          echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
-          docker push nfcore/eager:latest
-          docker tag nfcore/eager:latest nfcore/eager:${{ github.ref }}
-          docker push nfcore/eager:${{ github.ref }}
+      - name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow
+        run: |
+          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled'
@@ -3,6 +3,30 @@
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [2.3.0] - 2021-01-11 - "Aalen"
+
+### `Added`
+
+- [#640](https://github.com/nf-core/eager/issues/640) - Added a pre-metagenomic screening filtering of low-sequence complexity reads with `bbduk`
+- [#583](https://github.com/nf-core/eager/issues/583) - Added `mapDamage2` rescaling of BAM files to remove damage
+- Updated usage (merging files) and workflow images reflecting new functionality.
+
+### `Fixed`
+
+- Removed leftover old DockerHub push CI commands.
+- [#627](https://github.com/nf-core/eager/issues/627) - Added de Barros Damgaard citation to README
+- [#630](https://github.com/nf-core/eager/pull/630) - Better handling of Qualimap memory requirements and error strategy.
+- Fixed some incomplete schema options to ensure users supply valid input values
+- [#638](https://github.com/nf-core/eager/issues/638#issuecomment-748877567) Fixed inverted circularfilter filtering (previously filtering would happen by default, not when requested by user as originally recorded in documentation)
+- [DeDup:](https://github.com/apeltzer/DeDup/commit/07d47868f10a6830da8c9161caa3755d9da155bf) Fixed Null Pointer Bug in DeDup by updating to 0.12.8 version
+- [#650](https://github.com/nf-core/eager/pull/650) - Increased memory given to FastQC for larger files by making it multithreaded
+
+### `Dependencies`
+
+- Update: DeDup v0.12.7 to v0.12.8
+
+### `Deprecated`
+
 ## [2.2.2] - 2020-12-09
 
 ### `Added`
 
@@ -7,10 +7,10 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a
 
 # Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-eager-2.2.2/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-eager-2.3/bin:$PATH
 
 # Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-eager-2.2.2 > nf-core-eager-2.2.2.yml
+RUN conda env export --name nf-core-eager-2.3 > nf-core-eager-2.3.yml
 
 # Instruct R processes to use these empty files instead of clashing with a local version
 RUN touch .Rprofile
 
@@ -22,10 +22,42 @@
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. The pipeline pre-processes raw data from FASTQ inputs, or preprocessed BAM inputs. It can align reads and performs extensive general NGS and aDNA specific quality-control on the results. It comes with docker, singularity or conda containers making installation trivial and results highly reproducible.
 
 <p align="center">
-    <img src="docs/images/output/overview/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
+    <img src="docs/images/usage/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
 </p>
 
-## Pipeline steps
+## Quick Start
+
+1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)
+
+2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
+
+3. Download the pipeline and test it on a minimal dataset with a single command:
+
+    ```bash
+    nextflow run nf-core/eager -profile test_tsv,<docker/singularity/podman/conda/institute>
+    ```
+
+    > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
+
+4. Start running your own analysis!
+
+    ```bash
+    nextflow run nf-core/eager -profile <docker/singularity/podman/conda/institute> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
+    ```
+
+5. Once your run has completed successfully, clean up the intermediate files.
+
+    ```bash
+    nextflow clean -f -k
+    ```
+
+See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.
+
+**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`
+
+Modifications to the default pipeline are easily made using various options as described in the documentation.
+
+## Pipeline Summary
 
 ### Default Steps
 
@@ -77,6 +109,7 @@ Additional functionality contained by the pipeline currently includes:
 
 #### Metagenomic Screening
 
+* Low-sequenced complexity filtering (`BBduk`)
 * Taxonomic binner with alignment (`MALT`)
 * Taxonomic binner without alignment (`Kraken2`)
 * aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)
@@ -86,51 +119,9 @@ Additional functionality contained by the pipeline currently includes:
 A graphical overview of suggested routes through the pipeline depending on context can be seen below.
 
 <p align="center">
-    <img src="docs/images/output/overview/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
+    <img src="docs/images/usage/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
 </p>
 
-## Quick Start
-
-1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)
-
-2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
-
-3. Download the pipeline and test it on a minimal dataset with a single command:
-
-    ```bash
-    nextflow run nf-core/eager -profile test,<docker/singularity/podman/conda/institute>
-    ```
-
-    > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
-
-4. Start running your own analysis!
-
-    ```bash
-    nextflow run nf-core/eager -profile <docker/singularity/conda> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
-    ```
-
-5. Once your run has completed successfully, clean up the intermediate files.
-
-    ```bash
-    nextflow clean -f -k
-    ```
-
-See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.
-
-**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`
-
-Modifications to the default pipeline are easily made using various options
-as described in the documentation.
-
-## Pipeline Summary
-
-By default, the pipeline currently performs the following:
-
-<!-- TODO nf-core: Fill in short bullet-pointed list of default steps of pipeline -->
-
-* Sequencing quality control (`FastQC`)
-* Overall pipeline run summaries (`MultiQC`)
-
 ## Documentation
 
 The nf-core/eager pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/eager/usage) and [output](https://nf-co.re/eager/output).
@@ -236,6 +227,8 @@ In addition, references of tools and data used in this pipeline are as follows:
 * **Bowtie2**  Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: [10.1038/nmeth.1923](https:/dx.doi.org/10.1038/nmeth.1923).
 * **sequenceTools** Stephan Schiffels (Unpublished). Download: [https://github.com/stschiff/sequenceTools](https://github.com/stschiff/sequenceTools)
 * **EigenstratDatabaseTools** Thiseas C. Lamnidis (Unpublished). Download: [https://github.com/TCLamnidis/EigenStratDatabaseTools.git](https://github.com/TCLamnidis/EigenStratDatabaseTools.git)
+* **mapDamage2** Jónsson, H., et al 2013. mapDamage2.0: fast approximate Bayesian estimates of ancient DNA damage parameters. Bioinformatics , 29(13), 1682–1684. [https://doi.org/10.1093/bioinformatics/btt193](https://doi.org/10.1093/bioinformatics/btt193)
+* **BBduk** Brian Bushnell (Unpublished). Download: [https://sourceforge.net/projects/bbmap/](sourceforge.net/projects/bbmap/)
 
 ## Data References
 
@@ -244,3 +237,4 @@ This repository uses test data from the following studies:
 * Fellows Yates, J. A. et al. (2017) ‘Central European Woolly Mammoth Population Dynamics: Insights from Late Pleistocene Mitochondrial Genomes’, Scientific reports, 7(1), p. 17714. [doi: 10.1038/s41598-017-17723-1](https://doi.org/10.1038/s41598-017-17723-1).
 * Gamba, C. et al. (2014) ‘Genome flux and stasis in a five millennium transect of European prehistory’, Nature communications, 5, p. 5257. [doi: 10.1038/ncomms6257](https://doi.org/10.1038/ncomms6257).
 * Star, B. et al. (2017) ‘Ancient DNA reveals the Arctic origin of Viking Age cod from Haithabu, Germany’, Proceedings of the National Academy of Sciences of the United States of America, 114(34), pp. 9152–9157. [doi: 10.1073/pnas.1710186114](https://doi.org/10.1073/pnas.1710186114).
+* de Barros Damgaard, P. et al. (2018). '137 ancient human genomes from across the Eurasian steppes.', Nature, 557(7705), 369–374. [doi: 10.1038/s41586-018-0094-2](https://doi.org/10.1038/s41586-018-0094-2)
@@ -6,7 +6,6 @@ report_comment: >
     This report has been generated by the <a href="https://github.com/nf-core/eager" target="_blank">nf-core/eager</a>
     analysis pipeline. For information about how to interpret these results, please see the
     <a href="https://github.com/nf-core/eager" target="_blank">documentation</a>.
-
 run_modules:
     - adapterRemoval
     - bowtie2
@@ -270,4 +269,4 @@ report_section_order:
     nf-core-eager-summary:
         order: -1001
 
-export_plots: true
+export_plots: true
@@ -35,7 +35,9 @@
     'VCF2genome':['v_vcf2genome.txt', r"VCF2Genome \(v. ([0-9].[0-9]+) "],
     'endorS.py':['v_endorSpy.txt', r"endorS.py (\S+)"],
     'kraken':['v_kraken.txt', r"Kraken version (\S+)"],
-    'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"]
+    'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"],
+    'mapDamage2':['v_mapdamage.txt',r"(\S+)"],
+    'bbduk':['v_bbduk.txt',r"(\S+)"]
 }
 
 results = OrderedDict()
@@ -55,7 +57,7 @@
 results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
 results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
 results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
-#results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
+results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
 results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
 results['sequenceTools'] = '<span style="color:#999999;\">N/A</span>'
 results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
@@ -71,6 +73,8 @@
 results['kraken'] = '<span style="color:#999999;\">N/A</span>'
 results['maltextract'] = '<span style="color:#999999;\">N/A</span>'
 results['eigenstrat_snp_coverage'] = '<span style="color:#999999;\">N/A</span>'
+results['mapDamage2'] = '<span style="color:#999999;\">N/A</span>'
+results['bbduk'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
 
@@ -74,7 +74,7 @@ process {
   }
 
   withName:qualimap{
-    errorStrategy = 'ignore'
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
   }
 
   withName:preseq {
 
@@ -51,4 +51,8 @@ process {
       time = { check_max( 10.m * task.attempt, 'time' ) }
   }
 
+  withName:'mapdamage_rescaling'{
+      time = { check_max( 20.m * task.attempt, 'time' ) }
+  }
+
 }
Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,7 @@ process {`
`74`	`74`	`}`
`75`	`75`
`76`	`76`	`withName:qualimap{`
`77`		`- errorStrategy = 'ignore'`
	`77`	`+ errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }`
`78`	`78`	`}`
`79`	`79`
`80`	`80`	`withName:preseq {`
Original file line number	Diff line number	Diff line change
`@@ -51,4 +51,8 @@ process {`
`51`	`51`	`time = { check_max( 10.m * task.attempt, 'time' ) }`
`52`	`52`	`}`
`53`	`53`
	`54`	`+ withName:'mapdamage_rescaling'{`
	`55`	`+ time = { check_max( 20.m * task.attempt, 'time' ) }`
	`56`	`+ }`
	`57`	`+`
`54`	`58`	`}`