Skip to content

Commit

Permalink
Merge pull request #649 from nf-core/dev
Browse files Browse the repository at this point in the history
PR for 2.3 release
  • Loading branch information
jfy133 authored Jan 11, 2021
2 parents 85e2e32 + 832dcfa commit 0bc87f3
Show file tree
Hide file tree
Showing 23 changed files with 4,538 additions and 3,905 deletions.
49 changes: 12 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ jobs:
- name: Build new docker image
if: env.MATCHED_FILES
run: docker build --no-cache . -t nfcore/eager:2.2.2
run: docker build --no-cache . -t nfcore/eager:2.3

- name: Pull docker image
if: ${{ !env.MATCHED_FILES }}
run: |
docker pull nfcore/eager:dev
docker tag nfcore/eager:dev nfcore/eager:2.2.2
docker tag nfcore/eager:dev nfcore/eager:2.3
- name: Install Nextflow
env:
Expand Down Expand Up @@ -146,16 +146,16 @@ jobs:
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools
- name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
- name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
- name: GENOTYPING_UG ON TRIMMED BAM Test
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
- name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
- name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam
Expand All @@ -167,6 +167,9 @@ jobs:
- name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output
- name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_bam_filtering --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter
- name: MALTEXTRACT Download resource files
run: |
mkdir -p databases/maltextract
Expand All @@ -186,34 +189,6 @@ jobs:
- name: MTNUCRATIO Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nmtnucratio
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio
push_dockerhub:
name: Push new Docker image to Docker Hub
runs-on: ubuntu-latest
# Only run if the tests passed
needs: test
# Only run for the nf-core repo, for releases and merged PRs
if: ${{ github.repository == 'nf-core/eager' && (github.event_name == 'release' || github.event_name == 'push') }}
env:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Build new docker image
run: docker build --no-cache . -t nfcore/eager:latest

- name: Push Docker image to DockerHub (dev)
if: ${{ github.event_name == 'push' }}
run: |
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
docker tag nfcore/eager:latest nfcore/eager:dev
docker push nfcore/eager:dev
- name: Push Docker image to DockerHub (release)
if: ${{ github.event_name == 'release' }}
run: |
echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
docker push nfcore/eager:latest
docker tag nfcore/eager:latest nfcore/eager:${{ github.ref }}
docker push nfcore/eager:${{ github.ref }}
- name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled'
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,30 @@
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [2.3.0] - 2021-01-11 - "Aalen"

### `Added`

- [#640](https://github.com/nf-core/eager/issues/640) - Added a pre-metagenomic screening filtering of low-sequence complexity reads with `bbduk`
- [#583](https://github.com/nf-core/eager/issues/583) - Added `mapDamage2` rescaling of BAM files to remove damage
- Updated usage (merging files) and workflow images reflecting new functionality.

### `Fixed`

- Removed leftover old DockerHub push CI commands.
- [#627](https://github.com/nf-core/eager/issues/627) - Added de Barros Damgaard citation to README
- [#630](https://github.com/nf-core/eager/pull/630) - Better handling of Qualimap memory requirements and error strategy.
- Fixed some incomplete schema options to ensure users supply valid input values
- [#638](https://github.com/nf-core/eager/issues/638#issuecomment-748877567) Fixed inverted circularfilter filtering (previously filtering would happen by default, not when requested by user as originally recorded in documentation)
- [DeDup:](https://github.com/apeltzer/DeDup/commit/07d47868f10a6830da8c9161caa3755d9da155bf) Fixed Null Pointer Bug in DeDup by updating to 0.12.8 version
- [#650](https://github.com/nf-core/eager/pull/650) - Increased memory given to FastQC for larger files by making it multithreaded

### `Dependencies`

- Update: DeDup v0.12.7 to v0.12.8

### `Deprecated`

## [2.2.2] - 2020-12-09

### `Added`
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ COPY environment.yml /
RUN conda env create --quiet -f /environment.yml && conda clean -a

# Add conda installation dir to PATH (instead of doing 'conda activate')
ENV PATH /opt/conda/envs/nf-core-eager-2.2.2/bin:$PATH
ENV PATH /opt/conda/envs/nf-core-eager-2.3/bin:$PATH

# Dump the details of the installed packages to a file for posterity
RUN conda env export --name nf-core-eager-2.2.2 > nf-core-eager-2.2.2.yml
RUN conda env export --name nf-core-eager-2.3 > nf-core-eager-2.3.yml

# Instruct R processes to use these empty files instead of clashing with a local version
RUN touch .Rprofile
Expand Down
84 changes: 39 additions & 45 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,42 @@
The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. The pipeline pre-processes raw data from FASTQ inputs, or preprocessed BAM inputs. It can align reads and performs extensive general NGS and aDNA specific quality-control on the results. It comes with docker, singularity or conda containers making installation trivial and results highly reproducible.

<p align="center">
<img src="docs/images/output/overview/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
<img src="docs/images/usage/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
</p>

## Pipeline steps
## Quick Start

1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)

2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_

3. Download the pipeline and test it on a minimal dataset with a single command:

```bash
nextflow run nf-core/eager -profile test_tsv,<docker/singularity/podman/conda/institute>
```

> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.

4. Start running your own analysis!

```bash
nextflow run nf-core/eager -profile <docker/singularity/podman/conda/institute> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
```

5. Once your run has completed successfully, clean up the intermediate files.

```bash
nextflow clean -f -k
```

See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.

**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`

Modifications to the default pipeline are easily made using various options as described in the documentation.

## Pipeline Summary

### Default Steps

Expand Down Expand Up @@ -77,6 +109,7 @@ Additional functionality contained by the pipeline currently includes:

#### Metagenomic Screening

* Low-sequenced complexity filtering (`BBduk`)
* Taxonomic binner with alignment (`MALT`)
* Taxonomic binner without alignment (`Kraken2`)
* aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)
Expand All @@ -86,51 +119,9 @@ Additional functionality contained by the pipeline currently includes:
A graphical overview of suggested routes through the pipeline depending on context can be seen below.

<p align="center">
<img src="docs/images/output/overview/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
<img src="docs/images/usage/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
</p>

## Quick Start

1. Install [`nextflow`](https://nf-co.re/usage/installation) (version >= 20.04.0)

2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`Podman`](https://podman.io/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_

3. Download the pipeline and test it on a minimal dataset with a single command:

```bash
nextflow run nf-core/eager -profile test,<docker/singularity/podman/conda/institute>
```

> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.

4. Start running your own analysis!

```bash
nextflow run nf-core/eager -profile <docker/singularity/conda> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
```

5. Once your run has completed successfully, clean up the intermediate files.

```bash
nextflow clean -f -k
```

See [usage docs](https://nf-co.re/eager/docs/usage.md) for all of the available options when running the pipeline.

**N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`

Modifications to the default pipeline are easily made using various options
as described in the documentation.

## Pipeline Summary

By default, the pipeline currently performs the following:

<!-- TODO nf-core: Fill in short bullet-pointed list of default steps of pipeline -->

* Sequencing quality control (`FastQC`)
* Overall pipeline run summaries (`MultiQC`)

## Documentation

The nf-core/eager pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/eager/usage) and [output](https://nf-co.re/eager/output).
Expand Down Expand Up @@ -236,6 +227,8 @@ In addition, references of tools and data used in this pipeline are as follows:
* **Bowtie2** Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: [10.1038/nmeth.1923](https:/dx.doi.org/10.1038/nmeth.1923).
* **sequenceTools** Stephan Schiffels (Unpublished). Download: [https://github.com/stschiff/sequenceTools](https://github.com/stschiff/sequenceTools)
* **EigenstratDatabaseTools** Thiseas C. Lamnidis (Unpublished). Download: [https://github.com/TCLamnidis/EigenStratDatabaseTools.git](https://github.com/TCLamnidis/EigenStratDatabaseTools.git)
* **mapDamage2** Jónsson, H., et al 2013. mapDamage2.0: fast approximate Bayesian estimates of ancient DNA damage parameters. Bioinformatics , 29(13), 1682–1684. [https://doi.org/10.1093/bioinformatics/btt193](https://doi.org/10.1093/bioinformatics/btt193)
* **BBduk** Brian Bushnell (Unpublished). Download: [https://sourceforge.net/projects/bbmap/](sourceforge.net/projects/bbmap/)
## Data References
Expand All @@ -244,3 +237,4 @@ This repository uses test data from the following studies:
* Fellows Yates, J. A. et al. (2017) ‘Central European Woolly Mammoth Population Dynamics: Insights from Late Pleistocene Mitochondrial Genomes’, Scientific reports, 7(1), p. 17714. [doi: 10.1038/s41598-017-17723-1](https://doi.org/10.1038/s41598-017-17723-1).
* Gamba, C. et al. (2014) ‘Genome flux and stasis in a five millennium transect of European prehistory’, Nature communications, 5, p. 5257. [doi: 10.1038/ncomms6257](https://doi.org/10.1038/ncomms6257).
* Star, B. et al. (2017) ‘Ancient DNA reveals the Arctic origin of Viking Age cod from Haithabu, Germany’, Proceedings of the National Academy of Sciences of the United States of America, 114(34), pp. 9152–9157. [doi: 10.1073/pnas.1710186114](https://doi.org/10.1073/pnas.1710186114).
* de Barros Damgaard, P. et al. (2018). '137 ancient human genomes from across the Eurasian steppes.', Nature, 557(7705), 369–374. [doi: 10.1038/s41586-018-0094-2](https://doi.org/10.1038/s41586-018-0094-2)
3 changes: 1 addition & 2 deletions assets/multiqc_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ report_comment: >
This report has been generated by the <a href="https://github.com/nf-core/eager" target="_blank">nf-core/eager</a>
analysis pipeline. For information about how to interpret these results, please see the
<a href="https://github.com/nf-core/eager" target="_blank">documentation</a>.
run_modules:
- adapterRemoval
- bowtie2
Expand Down Expand Up @@ -270,4 +269,4 @@ report_section_order:
nf-core-eager-summary:
order: -1001

export_plots: true
export_plots: true
8 changes: 6 additions & 2 deletions bin/scrape_software_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
'VCF2genome':['v_vcf2genome.txt', r"VCF2Genome \(v. ([0-9].[0-9]+) "],
'endorS.py':['v_endorSpy.txt', r"endorS.py (\S+)"],
'kraken':['v_kraken.txt', r"Kraken version (\S+)"],
'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"]
'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"],
'mapDamage2':['v_mapdamage.txt',r"(\S+)"],
'bbduk':['v_bbduk.txt',r"(\S+)"]
}

results = OrderedDict()
Expand All @@ -55,7 +57,7 @@
results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
#results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
results['sequenceTools'] = '<span style="color:#999999;\">N/A</span>'
results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
Expand All @@ -71,6 +73,8 @@
results['kraken'] = '<span style="color:#999999;\">N/A</span>'
results['maltextract'] = '<span style="color:#999999;\">N/A</span>'
results['eigenstrat_snp_coverage'] = '<span style="color:#999999;\">N/A</span>'
results['mapDamage2'] = '<span style="color:#999999;\">N/A</span>'
results['bbduk'] = '<span style="color:#999999;\">N/A</span>'

# Search each file using its regex
for k, v in regexes.items():
Expand Down
2 changes: 1 addition & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ process {
}

withName:qualimap{
errorStrategy = 'ignore'
errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
}

withName:preseq {
Expand Down
4 changes: 4 additions & 0 deletions conf/test_resources.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,8 @@ process {
time = { check_max( 10.m * task.attempt, 'time' ) }
}

withName:'mapdamage_rescaling'{
time = { check_max( 20.m * task.attempt, 'time' ) }
}

}
Binary file not shown.
Binary file removed docs/images/output/overview/eager2_workflow.png
Binary file not shown.
Loading

0 comments on commit 0bc87f3

Please sign in to comment.