diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..db4fdbb --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,71 @@ +version: 2.1 +jobs: + build-test-push-job: + docker: + - image: circleci/buildpack-deps:stretch + environment: + IMAGE_NAME: quay.io/wtsicgp/dockstore-cgpmap + steps: + - checkout + - setup_remote_docker + - run: + name: Build Docker image + command: | + set -e + echo "Building Docker image" + docker build -t test . + - run: + name: Validate CWL files + command: | + set -e + echo "Validating CWL files" + sudo apt-get -y install python3-pip + pip3 install cwltool + cwltool --validate Dockstore.cwl + cwltool --validate cwls/cgpmap-bamBaiOut.cwl + cwltool --validate cwls/cgpmap-bamCsiOut.cwl + cwltool --validate cwls/cgpmap-cramOut.cwl + - run: + name: Verify inherited programs + command: | + set -e + echo 'Verifying programs inherited from cgpbigwig' + docker run --rm -t test bwjoin --version + echo 'Verifying programs inherited from pcap-core' + docker run --rm -t test bwa_mem.pl -version + docker run --rm -t test bammarkduplicates2 --version + docker run --rm -t test samtools --version + docker run --rm -t test bash -c 'bwa 2>&1 | grep Version' + - run: + name: Verify programs + command: | + set -e + echo 'Verify programs from dockstore-cgpmap' + docker run --rm -t test ds-cgpmap.pl -h + - run: + name: Push image to quay.io + command: | + set -e + BRANCH_OR_TAG="${CIRCLE_TAG:-$CIRCLE_BRANCH}" + if [ "$CIRCLE_TAG" = "$BRANCH_OR_TAG" ]; then + echo 'Pushing image' + echo "$DOCKERHUB_PASSWORD" | docker login -u "$DOCKERHUB_USER" --password-stdin + echo "$QUAYIO_PASSWORD" | docker login -u "$QUAYIO_USER" --password-stdin quay.io + CLEAN_BRANCH=$(echo $BRANCH_OR_TAG | tr / -) + docker tag test ${IMAGE_NAME}:${CLEAN_BRANCH} + docker push ${IMAGE_NAME}:${CLEAN_BRANCH} + else + echo "I am not a tag so not pushing image" + fi + +workflows: + version: 2.1 + build-test-push-workflow: + jobs: + - build-test-push-job: + context: + - dockerhub-casmservice + - quayio-casmservice + filters: + tags: + only: /.+/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e78a2b3..0000000 --- a/.travis.yml +++ /dev/null @@ -1,33 +0,0 @@ -notifications: - slack: wtsi-cgpit:ptUMR1tkNyZJYd9TpGoss8WR - email: false - -sudo: false - -services: - - docker - -install: - - virtualenv -p python3 venv - - source venv/bin/activate - - pip install html5lib cwltool - -script: - - set -e - - echo 'Validate CWL file(s)' - - cwltool --validate Dockstore.cwl - - cwltool --validate cwls/cgpmap-bamBaiOut.cwl - - cwltool --validate cwls/cgpmap-bamCsiOut.cwl - - cwltool --validate cwls/cgpmap-cramOut.cwl - - echo 'Build and check docker image' - - docker build -t dockstore-cgpmap . - - docker images | grep -c dockstore-cgpmap - - echo 'Verify program(s) from each inherited package (cgpbigwig)' - - docker run -t --rm dockstore-cgpmap bwjoin --version - - echo 'Verify program(s) from each inherited package (pcap-core)' - - docker run -t --rm dockstore-cgpmap bwa_mem.pl -version - - docker run -t --rm dockstore-cgpmap bammarkduplicates2 --version - - docker run -t --rm dockstore-cgpmap samtools --version - - docker run -t --rm dockstore-cgpmap bash -c 'bwa 2>&1 | grep Version' - - echo 'Verify program(s) this layer are found (dockstore-cgpmap)' - - docker run -t --rm dockstore-cgpmap ds-cgpmap.pl -h diff --git a/CHANGES.md b/CHANGES.md index c3b3a9b..ebcb7e9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # CHANGES +## 3.3.0 + +* Update PCAP-core to 5.7.0, which includes bwakit post processing + ## 3.2.0 * Update PCAP-core to 5.4.0, new ubuntu htslib/samtools 1.11. diff --git a/Dockerfile b/Dockerfile index c2cbf14..800824d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ -FROM quay.io/wtsicgp/pcap-core:5.4.0 +FROM quay.io/wtsicgp/pcap-core:5.7.0 LABEL vendor="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" LABEL maintainer="cgphelp@sanger.ac.uk" LABEL uk.ac.sanger.cgp.description="PCAP-core for dockstore.org" -LABEL uk.ac.sanger.cgp.version="3.2.0" +LABEL uk.ac.sanger.cgp.version="3.3.0" USER root diff --git a/README.md b/README.md index decab56..ac2cd94 100644 --- a/README.md +++ b/README.md @@ -7,19 +7,18 @@ packaged specifically for use with the [Dockstore.org](https://dockstore.org/) f [![Quay Badge][quay-status]][quay-repo] -| Master | Develop | -| --------------------------------------------- | ----------------------------------------------- | -| [![Master Badge][travis-master]][travis-base] | [![Develop Badge][travis-develop]][travis-base] | +| Master | Develop | +| --- | --- | +| [![Master Badge][circleci-master-badge]][circleci-master] | [![Develop Badge][circleci-develop-badge]][circleci-develop] | * [Supported input formats](#supported-input-formats) * [Options for customisation:](#options-for-customisation) * [Usable Cores](#usable-cores) -* [Other uses](#other-uses) - * [Native docker](#native-docker) - * [Singularity](#singularity) -* [Verifying your deployment](#verifying-your-deployment) -* [Development environment](#development-environment) -* [Release process](#release-process) +* [Run instructions](#run-instructions) +* [Development](#development) + * [Verifying your deployment](#verifying-your-deployment) + * [Development environment](#development-environment) + * [Release process](#release-process) * [LICENCE](#licence) ## Supported input formats @@ -31,73 +30,23 @@ packaged specifically for use with the [Dockstore.org](https://dockstore.org/) f ## Options for customisation: -* BWA specific mapping parameters (defaults are based on attempts at a global standard). -* Optionally output CRAM (scramble parameters can be modified) +* BWA specific mapping parameters (defaults are based on attempts at a global standard) +* Optionally output CRAM (see `seqslice` to for faster access, recommend 1000) + * applied to `seqs_per_slice` option of htslib/samtools. +* Optionally run with BWA-MEM2 +* Optionally run with bwa-kit post-processing (for calling on alternative contigs) -## Usable Cores +## Run instructions -When running outside of a docker container you can set the number of CPUs via: +The full documentation covering input files, optional parameters, and methods of running dockstore-cgpmap can be found in the [github wiki][github-wiki]. -* `export CPU=N` -* `-threads|-t` option of `ds-cgpmap.pl` +## Development -If not set detects available cores on system. +### Verifying your deployment -## Other uses +The `examples/` tree contains test json files populated with data that can be used to verify the tool. More details on running Dockstore locally for testing purposes can be found in the [github wiki][github-wiki]. -### Native docker - -All of the tools installed as part of [PCAP-core][pcap-core] are available for direct use. - -``` -export CGPMAP_VER=X.X.X -docker pull quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER -# interactive session -docker --rm -ti [--volume ...] quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER bash -``` - -### Singularity - -The resulting docker container has been tested with Singularity. The command to exec is: - -``` -ds-cgpmap.pl -h -``` - -Expected use would be along the lines of: - -``` -export CGPMAP_VER=X.X.X -singularity pull docker://quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER - -singularity exec\ - --workdir /.../workspace \ - --home /.../workspace:/home \ - --bind /.../ref/human:/var/spool/ref:ro \ - --bind /.../example_data/cgpmap/insilico_21:/var/spool/data:ro \ - dockstore-cgpmap-${CGPMAP_VER}.simg \ - ds-cgpmap.pl \ - -r /var/spool/ref/core_ref_GRCh37d5.tar.gz \ - -i /var/spool/ref/bwa_idx_GRCh37d5.tar.gz \ - -s SOMENAME \ - -t 6 \ - /var/spool/data/\*.bam -``` - -For a system automatically attaching _all local mount points_ (not default singularity behaviour) -you need not specify any `exec` params (workdir, home, bind) but you should specify the `-outdir` -option for `ds-cgpmap.pl` to prevent data being written to your home directory. - -By default results are written to the home directory of the container so ensure you bind -a large volume and set the `-home` variable. As indicated above the location can be overridden -via the options of `ds-cgpmap.pl` - -## Verifying your deployment - -The `examples/` tree contains test json files populated with data that can be used to verify the -tool. More details can be found [here](examples/README.md). - -## Development environment +### Development environment This project uses git pre-commit hooks. Please enable them to prevent inappropriate large files being included. Any pull request found not to have adhered to this will be rejected and the branch @@ -109,14 +58,14 @@ Activate the hooks with git config core.hooksPath git-hooks ``` -## Release process +### Release process This project is maintained using HubFlow. 1. Make appropriate changes 2. Bump version in `Dockerfile` and `cwls/mixins/requirements.yml` 3. Push changes -4. Check state on Travis +4. Check state on CircleCi 5. Generate the release (add notes to GitHub) 6. Confirm that image has been built on [quay.io][quay-builds] 7. Update the [dockstore][dockstore-cgpmap] entry, see [their docs][dockstore-get-started]. @@ -158,11 +107,13 @@ identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, [bwa-mem.pl]: https://github.com/cancerit/PCAP-core/blob/master/bin/bwa_mem.pl [cgpmap-expected]: ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/expected [pcap-core]: https://github.com/cancerit/PCAP-core +[github-wiki]: https://github.com/cancerit/dockstore-cgpmap/wiki - -[travis-base]: https://travis-ci.org/cancerit/dockstore-cgpmap -[travis-master]: https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=master -[travis-develop]: https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=develop + +[circleci-master-badge]: https://circleci.com/gh/cancerit/dockstore-cgpmap/tree/master.svg?style=svg +[circleci-master]: https://circleci.com/gh/cancerit/dockstore-cgpmap/tree/master +[circleci-develop-badge]: https://circleci.com/gh/cancerit/dockstore-cgpmap/tree/develop.svg?style=svg +[circleci-develop]: https://circleci.com/gh/cancerit/dockstore-cgpmap/tree/master [gitter-svg]: https://badges.gitter.im/dockstore-cgp/Lobby.svg diff --git a/cwls/cgpmap-bamBaiOut.cwl b/cwls/cgpmap-bamBaiOut.cwl index 131c044..0342e0c 100644 --- a/cwls/cgpmap-bamBaiOut.cwl +++ b/cwls/cgpmap-bamBaiOut.cwl @@ -91,12 +91,19 @@ inputs: prefix: -bwamem2 position: 9 + bwakit: + type: boolean + doc: "Use bwakit post-processing" + inputBinding: + prefix: -bwakit + position: 10 + nomarkdup: type: boolean doc: "Do not mark duplicates" inputBinding: prefix: -nomarkdup - position: 10 + position: 11 dupmode: type: string? @@ -104,7 +111,7 @@ inputs: default: 't' inputBinding: prefix: -dupmode - position: 11 + position: 12 separate: true legacy: @@ -112,7 +119,7 @@ inputs: doc: "Use legacy merge/dupmark from biobambam2 tools, slower, more memory" inputBinding: prefix: -legacy - position: 12 + position: 13 seq_in: type: @@ -121,7 +128,7 @@ inputs: items: File doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." inputBinding: - position: 13 + position: 14 outputs: out_bam: diff --git a/cwls/cgpmap-bamCsiOut.cwl b/cwls/cgpmap-bamCsiOut.cwl index e4dc8cd..b27d0bb 100644 --- a/cwls/cgpmap-bamCsiOut.cwl +++ b/cwls/cgpmap-bamCsiOut.cwl @@ -91,12 +91,19 @@ inputs: prefix: -bwamem2 position: 9 + bwakit: + type: boolean + doc: "Use bwakit post-processing" + inputBinding: + prefix: -bwakit + position: 10 + nomarkdup: type: boolean doc: "Do not mark duplicates" inputBinding: prefix: -nomarkdup - position: 10 + position: 11 dupmode: type: string? @@ -104,7 +111,7 @@ inputs: default: 't' inputBinding: prefix: -dupmode - position: 11 + position: 12 separate: true legacy: @@ -112,7 +119,7 @@ inputs: doc: "Use legacy merge/dupmark from biobambam2 tools, slower, more memory" inputBinding: prefix: -legacy - position: 12 + position: 13 seq_in: type: @@ -121,7 +128,7 @@ inputs: items: File doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." inputBinding: - position: 13 + position: 14 outputs: out_bam: diff --git a/cwls/cgpmap-cramOut.cwl b/cwls/cgpmap-cramOut.cwl index 8e2b672..bbfa02c 100644 --- a/cwls/cgpmap-cramOut.cwl +++ b/cwls/cgpmap-cramOut.cwl @@ -100,12 +100,19 @@ inputs: prefix: -bwamem2 position: 10 + bwakit: + type: boolean + doc: "Use bwakit post-processing" + inputBinding: + prefix: -bwakit + position: 11 + nomarkdup: type: boolean doc: "Do not mark duplicates" inputBinding: prefix: -nomarkdup - position: 11 + position: 12 dupmode: type: string? @@ -113,7 +120,7 @@ inputs: default: 't' inputBinding: prefix: -dupmode - position: 12 + position: 13 separate: true legacy: @@ -121,7 +128,7 @@ inputs: doc: "Use legacy merge/dupmark from biobambam2 tools, slower, more memory" inputBinding: prefix: -legacy - position: 13 + position: 14 seq_in: type: @@ -130,7 +137,7 @@ inputs: items: File doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." inputBinding: - position: 14 + position: 15 outputs: out_cram: diff --git a/cwls/mixins/requirements.yml b/cwls/mixins/requirements.yml index 7a4a3d6..4f03bb6 100644 --- a/cwls/mixins/requirements.yml +++ b/cwls/mixins/requirements.yml @@ -1,2 +1,2 @@ class: DockerRequirement -dockerPull: "quay.io/wtsicgp/dockstore-cgpmap:3.2.0" +dockerPull: "quay.io/wtsicgp/dockstore-cgpmap:3.3.0" diff --git a/examples/cgpmap/bamBaiOut/bam_in.json b/examples/cgpmap/bamBaiOut/bam_in.json index 8382de2..7bc2a3b 100644 --- a/examples/cgpmap/bamBaiOut/bam_in.json +++ b/examples/cgpmap/bamBaiOut/bam_in.json @@ -16,6 +16,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/examples/cgpmap/bamBaiOut/cram_in.json b/examples/cgpmap/bamBaiOut/cram_in.json index c09f1f0..607780c 100644 --- a/examples/cgpmap/bamBaiOut/cram_in.json +++ b/examples/cgpmap/bamBaiOut/cram_in.json @@ -16,6 +16,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/examples/cgpmap/bamBaiOut/fq_gz_in.json b/examples/cgpmap/bamBaiOut/fq_gz_in.json index 23ee971..17deaf0 100644 --- a/examples/cgpmap/bamBaiOut/fq_gz_in.json +++ b/examples/cgpmap/bamBaiOut/fq_gz_in.json @@ -34,6 +34,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/examples/cgpmap/bamBaiOut/fq_gz_in_qcreads.json b/examples/cgpmap/bamBaiOut/fq_gz_in_qcreads.json index b7b04d8..7973130 100644 --- a/examples/cgpmap/bamBaiOut/fq_gz_in_qcreads.json +++ b/examples/cgpmap/bamBaiOut/fq_gz_in_qcreads.json @@ -34,6 +34,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/examples/cgpmap/bamCsiOut/fq_gz_in.json b/examples/cgpmap/bamCsiOut/fq_gz_in.json index 23ee971..17deaf0 100644 --- a/examples/cgpmap/bamCsiOut/fq_gz_in.json +++ b/examples/cgpmap/bamCsiOut/fq_gz_in.json @@ -34,6 +34,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/examples/cgpmap/cramOut/fq_gz_in.json b/examples/cgpmap/cramOut/fq_gz_in.json index e6d637f..d077b03 100644 --- a/examples/cgpmap/cramOut/fq_gz_in.json +++ b/examples/cgpmap/cramOut/fq_gz_in.json @@ -34,6 +34,7 @@ "mmqcfrag": 0.05, "threads": 0, "bwamem2": false, + "bwakit": false, "nomarkdup": false, "dupmode": "t", "legacy": false, diff --git a/scripts/ds-cgpmap.pl b/scripts/ds-cgpmap.pl index 3e2e870..234eb99 100755 --- a/scripts/ds-cgpmap.pl +++ b/scripts/ds-cgpmap.pl @@ -21,6 +21,7 @@ 'f' => 0.05, 'dupmode' => 't', 'bwamem2' => 0, + 'bwakit' => 0, 'nomarkdup' => 0, 'legacy' => 0, 'seqslice' => undef, @@ -40,6 +41,7 @@ 'q|qc' => \$opts{'q'}, 'f|qcf:f' => \$opts{'f'}, 'bm2|bwamem2' => \$opts{'bwamem2'}, + 'kit|bwakit' => \$opts{'bwakit'}, 'n|nomarkdup' => \$opts{'nomarkdup'}, 'd|dupmode:s' => \$opts{'dupmode'}, 'legacy' => \$opts{'legacy'}, @@ -85,6 +87,7 @@ printf $FH "MMQCFRAC=%s\n", $opts{'f'} if(defined $opts{'f'}); printf $FH "DUPMODE=%s\n", $opts{'dupmode'}; printf $FH "BWAMEM2=%d\n", $opts{'bwamem2'} if(defined $opts{'bwamem2'}); +printf $FH "BWAKIT=%d\n", $opts{'bwakit'} if(defined $opts{'bwakit'}); printf $FH "NOMARKDUP=%d\n", $opts{'nomarkdup'} if(defined $opts{'nomarkdup'}); printf $FH "LEGACY=%d\n", $opts{'legacy'} if(defined $opts{'legacy'}); printf $FH "SEQSLICE=%d\n", $opts{'seqslice'} if(defined $opts{'seqslice'}); @@ -125,6 +128,7 @@ =head1 SYNOPSIS Optional parameters: -threads -t Set the number of cpu/cores available [default all]. -bwamem2 -bm2 Use bwa-mem2 instead of bwa (experimental). + -bwakit -kit Run bwakit post alignment processing. -nomarkdup -n Don't mark duplicates [flag] -seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000] -cram -c Output cram, see '-seqslice' diff --git a/scripts/mapping.sh b/scripts/mapping.sh index f953c83..a0d84ac 100755 --- a/scripts/mapping.sh +++ b/scripts/mapping.sh @@ -58,6 +58,7 @@ else echo -e "\tSEQSLICE : $SEQSLICE" fi echo -e "\tBWAMEM2 : $BWAMEM2" +echo -e "\tBWAKIT : $BWAKIT" echo -e "\tNOMARKDUP : $NOMARKDUP" echo -e "\tLEGACY : $LEGACY" set +u @@ -122,6 +123,11 @@ if [ $BWAMEM2 -gt 0 ]; then ADD_ARGS="$ADD_ARGS --bwamem2" fi +# if BWAKIT set +if [ $BWAKIT -gt 0 ]; then + ADD_ARGS="$ADD_ARGS --bwakit" +fi + # if NOMARKDUP set if [ $NOMARKDUP -gt 0 ]; then ADD_ARGS="$ADD_ARGS --nomarkdup"