Skip to content

Commit 0bc87f3

Browse files
authored
Merge pull request #649 from nf-core/dev
PR for 2.3 release
2 parents 85e2e32 + 832dcfa commit 0bc87f3

23 files changed

+4538
-3905
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ jobs:
3434
3535
- name: Build new docker image
3636
if: env.MATCHED_FILES
37-
run: docker build --no-cache . -t nfcore/eager:2.2.2
37+
run: docker build --no-cache . -t nfcore/eager:2.3
3838

3939
- name: Pull docker image
4040
if: ${{ !env.MATCHED_FILES }}
4141
run: |
4242
docker pull nfcore/eager:dev
43-
docker tag nfcore/eager:dev nfcore/eager:2.2.2
43+
docker tag nfcore/eager:dev nfcore/eager:2.3
4444
4545
- name: Install Nextflow
4646
env:
@@ -146,16 +146,16 @@ jobs:
146146
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools
147147
- name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
148148
run: |
149-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
149+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
150150
- name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
151151
run: |
152-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
152+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
153153
- name: GENOTYPING_UG ON TRIMMED BAM Test
154154
run: |
155-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
155+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
156156
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
157157
run: |
158-
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
158+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
159159
- name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
160160
run: |
161161
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam
@@ -167,6 +167,9 @@ jobs:
167167
- name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT
168168
run: |
169169
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output
170+
- name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT
171+
run: |
172+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter
170173
- name: MALTEXTRACT Download resource files
171174
run: |
172175
mkdir -p databases/maltextract
@@ -186,34 +189,6 @@ jobs:
186189
- name: MTNUCRATIO Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nmtnucratio
187190
run: |
188191
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio
189-
190-
push_dockerhub:
191-
name: Push new Docker image to Docker Hub
192-
runs-on: ubuntu-latest
193-
# Only run if the tests passed
194-
needs: test
195-
# Only run for the nf-core repo, for releases and merged PRs
196-
if: ${{ github.repository == 'nf-core/eager' && (github.event_name == 'release' || github.event_name == 'push') }}
197-
env:
198-
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
199-
DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
200-
steps:
201-
- name: Check out pipeline code
202-
uses: actions/checkout@v2
203-
204-
- name: Build new docker image
205-
run: docker build --no-cache . -t nfcore/eager:latest
206-
207-
- name: Push Docker image to DockerHub (dev)
208-
if: ${{ github.event_name == 'push' }}
209-
run: |
210-
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
211-
docker tag nfcore/eager:latest nfcore/eager:dev
212-
docker push nfcore/eager:dev
213-
- name: Push Docker image to DockerHub (release)
214-
if: ${{ github.event_name == 'release' }}
215-
run: |
216-
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
217-
docker push nfcore/eager:latest
218-
docker tag nfcore/eager:latest nfcore/eager:${{ github.ref }}
219-
docker push nfcore/eager:${{ github.ref }}
192+
- name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow
193+
run: |
194+
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled'

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,30 @@
33
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
44
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
55

6+
## [2.3.0] - 2021-01-11 - "Aalen"
7+
8+
### `Added`
9+
10+
- [#640](https://github.com/nf-core/eager/issues/640) - Added a pre-metagenomic screening filtering of low-sequence complexity reads with `bbduk`
11+
- [#583](https://github.com/nf-core/eager/issues/583) - Added `mapDamage2` rescaling of BAM files to remove damage
12+
- Updated usage (merging files) and workflow images reflecting new functionality.
13+
14+
### `Fixed`
15+
16+
- Removed leftover old DockerHub push CI commands.
17+
- [#627](https://github.com/nf-core/eager/issues/627) - Added de Barros Damgaard citation to README
18+
- [#630](https://github.com/nf-core/eager/pull/630) - Better handling of Qualimap memory requirements and error strategy.
19+
- Fixed some incomplete schema options to ensure users supply valid input values
20+
- [#638](https://github.com/nf-core/eager/issues/638#issuecomment-748877567) Fixed inverted circularfilter filtering (previously filtering would happen by default, not when requested by user as originally recorded in documentation)
21+
- [DeDup:](https://github.com/apeltzer/DeDup/commit/07d47868f10a6830da8c9161caa3755d9da155bf) Fixed Null Pointer Bug in DeDup by updating to 0.12.8 version
22+
- [#650](https://github.com/nf-core/eager/pull/650) - Increased memory given to FastQC for larger files by making it multithreaded
23+
24+
### `Dependencies`
25+
26+
- Update: DeDup v0.12.7 to v0.12.8
27+
28+
### `Deprecated`
29+
630
## [2.2.2] - 2020-12-09
731

832
### `Added`

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ COPY environment.yml /
77
RUN conda env create --quiet -f /environment.yml && conda clean -a
88

99
# Add conda installation dir to PATH (instead of doing 'conda activate')
10-
ENV PATH /opt/conda/envs/nf-core-eager-2.2.2/bin:$PATH
10+
ENV PATH /opt/conda/envs/nf-core-eager-2.3/bin:$PATH
1111

1212
# Dump the details of the installed packages to a file for posterity
13-
RUN conda env export --name nf-core-eager-2.2.2 > nf-core-eager-2.2.2.yml
13+
RUN conda env export --name nf-core-eager-2.3 > nf-core-eager-2.3.yml
1414

1515
# Instruct R processes to use these empty files instead of clashing with a local version
1616
RUN touch .Rprofile

README.md

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,42 @@
2222
The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. The pipeline pre-processes raw data from FASTQ inputs, or preprocessed BAM inputs. It can align reads and performs extensive general NGS and aDNA specific quality-control on the results. It comes with docker, singularity or conda containers making installation trivial and results highly reproducible.
2323

2424
<p align="center">
25-
<img src="docs/images/output/overview/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
25+
<img src="docs/images/usage/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
2626
</p>
2727

28-
## Pipeline steps
28+
## Quick Start
29+
30+
1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)
31+
32+
2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
33+
34+
3. Download the pipeline and test it on a minimal dataset with a single command:
35+
36+
```bash
37+
nextflow run nf-core/eager -profile test_tsv,<docker/singularity/podman/conda/institute>
38+
```
39+
40+
> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
41+
42+
4. Start running your own analysis!
43+
44+
```bash
45+
nextflow run nf-core/eager -profile <docker/singularity/podman/conda/institute> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
46+
```
47+
48+
5. Once your run has completed successfully, clean up the intermediate files.
49+
50+
```bash
51+
nextflow clean -f -k
52+
```
53+
54+
See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.
55+
56+
**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`
57+
58+
Modifications to the default pipeline are easily made using various options as described in the documentation.
59+
60+
## Pipeline Summary
2961

3062
### Default Steps
3163

@@ -77,6 +109,7 @@ Additional functionality contained by the pipeline currently includes:
77109

78110
#### Metagenomic Screening
79111

112+
* Low-sequenced complexity filtering (`BBduk`)
80113
* Taxonomic binner with alignment (`MALT`)
81114
* Taxonomic binner without alignment (`Kraken2`)
82115
* aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)
@@ -86,51 +119,9 @@ Additional functionality contained by the pipeline currently includes:
86119
A graphical overview of suggested routes through the pipeline depending on context can be seen below.
87120

88121
<p align="center">
89-
<img src="docs/images/output/overview/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
122+
<img src="docs/images/usage/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
90123
</p>
91124

92-
## Quick Start
93-
94-
1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)
95-
96-
2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
97-
98-
3. Download the pipeline and test it on a minimal dataset with a single command:
99-
100-
```bash
101-
nextflow run nf-core/eager -profile test,<docker/singularity/podman/conda/institute>
102-
```
103-
104-
> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
105-
106-
4. Start running your own analysis!
107-
108-
```bash
109-
nextflow run nf-core/eager -profile <docker/singularity/conda> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
110-
```
111-
112-
5. Once your run has completed successfully, clean up the intermediate files.
113-
114-
```bash
115-
nextflow clean -f -k
116-
```
117-
118-
See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.
119-
120-
**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`
121-
122-
Modifications to the default pipeline are easily made using various options
123-
as described in the documentation.
124-
125-
## Pipeline Summary
126-
127-
By default, the pipeline currently performs the following:
128-
129-
<!-- TODO nf-core: Fill in short bullet-pointed list of default steps of pipeline -->
130-
131-
* Sequencing quality control (`FastQC`)
132-
* Overall pipeline run summaries (`MultiQC`)
133-
134125
## Documentation
135126

136127
The nf-core/eager pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/eager/usage) and [output](https://nf-co.re/eager/output).
@@ -236,6 +227,8 @@ In addition, references of tools and data used in this pipeline are as follows:
236227
* **Bowtie2** Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: [10.1038/nmeth.1923](https:/dx.doi.org/10.1038/nmeth.1923).
237228
* **sequenceTools** Stephan Schiffels (Unpublished). Download: [https://github.com/stschiff/sequenceTools](https://github.com/stschiff/sequenceTools)
238229
* **EigenstratDatabaseTools** Thiseas C. Lamnidis (Unpublished). Download: [https://github.com/TCLamnidis/EigenStratDatabaseTools.git](https://github.com/TCLamnidis/EigenStratDatabaseTools.git)
230+
* **mapDamage2** Jónsson, H., et al 2013. mapDamage2.0: fast approximate Bayesian estimates of ancient DNA damage parameters. Bioinformatics , 29(13), 1682–1684. [https://doi.org/10.1093/bioinformatics/btt193](https://doi.org/10.1093/bioinformatics/btt193)
231+
* **BBduk** Brian Bushnell (Unpublished). Download: [https://sourceforge.net/projects/bbmap/](sourceforge.net/projects/bbmap/)
239232
240233
## Data References
241234
@@ -244,3 +237,4 @@ This repository uses test data from the following studies:
244237
* Fellows Yates, J. A. et al. (2017) ‘Central European Woolly Mammoth Population Dynamics: Insights from Late Pleistocene Mitochondrial Genomes’, Scientific reports, 7(1), p. 17714. [doi: 10.1038/s41598-017-17723-1](https://doi.org/10.1038/s41598-017-17723-1).
245238
* Gamba, C. et al. (2014) ‘Genome flux and stasis in a five millennium transect of European prehistory’, Nature communications, 5, p. 5257. [doi: 10.1038/ncomms6257](https://doi.org/10.1038/ncomms6257).
246239
* Star, B. et al. (2017) ‘Ancient DNA reveals the Arctic origin of Viking Age cod from Haithabu, Germany’, Proceedings of the National Academy of Sciences of the United States of America, 114(34), pp. 9152–9157. [doi: 10.1073/pnas.1710186114](https://doi.org/10.1073/pnas.1710186114).
240+
* de Barros Damgaard, P. et al. (2018). '137 ancient human genomes from across the Eurasian steppes.', Nature, 557(7705), 369–374. [doi: 10.1038/s41586-018-0094-2](https://doi.org/10.1038/s41586-018-0094-2)

assets/multiqc_config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ report_comment: >
66
This report has been generated by the <a href="https://github.com/nf-core/eager" target="_blank">nf-core/eager</a>
77
analysis pipeline. For information about how to interpret these results, please see the
88
<a href="https://github.com/nf-core/eager" target="_blank">documentation</a>.
9-
109
run_modules:
1110
- adapterRemoval
1211
- bowtie2
@@ -270,4 +269,4 @@ report_section_order:
270269
nf-core-eager-summary:
271270
order: -1001
272271

273-
export_plots: true
272+
export_plots: true

bin/scrape_software_versions.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
'VCF2genome':['v_vcf2genome.txt', r"VCF2Genome \(v. ([0-9].[0-9]+) "],
3636
'endorS.py':['v_endorSpy.txt', r"endorS.py (\S+)"],
3737
'kraken':['v_kraken.txt', r"Kraken version (\S+)"],
38-
'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"]
38+
'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"],
39+
'mapDamage2':['v_mapdamage.txt',r"(\S+)"],
40+
'bbduk':['v_bbduk.txt',r"(\S+)"]
3941
}
4042

4143
results = OrderedDict()
@@ -55,7 +57,7 @@
5557
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
5658
results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
5759
results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
58-
#results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
60+
results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
5961
results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
6062
results['sequenceTools'] = '<span style="color:#999999;\">N/A</span>'
6163
results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
@@ -71,6 +73,8 @@
7173
results['kraken'] = '<span style="color:#999999;\">N/A</span>'
7274
results['maltextract'] = '<span style="color:#999999;\">N/A</span>'
7375
results['eigenstrat_snp_coverage'] = '<span style="color:#999999;\">N/A</span>'
76+
results['mapDamage2'] = '<span style="color:#999999;\">N/A</span>'
77+
results['bbduk'] = '<span style="color:#999999;\">N/A</span>'
7478

7579
# Search each file using its regex
7680
for k, v in regexes.items():

conf/base.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ process {
7474
}
7575

7676
withName:qualimap{
77-
errorStrategy = 'ignore'
77+
errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
7878
}
7979

8080
withName:preseq {

conf/test_resources.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,8 @@ process {
5151
time = { check_max( 10.m * task.attempt, 'time' ) }
5252
}
5353

54+
withName:'mapdamage_rescaling'{
55+
time = { check_max( 20.m * task.attempt, 'time' ) }
56+
}
57+
5458
}
Binary file not shown.
-192 KB
Binary file not shown.

0 commit comments

Comments
 (0)