diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..db9a224 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +/examples/ +/cwls/ +/git-hooks/ +/*.* +/LICENSE diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..47def24 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/env/ diff --git a/.travis.yml b/.travis.yml index 0f265bb..272317b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,25 @@ services: - docker install: - - docker build -t dockstore-cgpmap . + - virtualenv -p python3 venv + - source venv/bin/activate + - pip install html5lib cwltool script: + - set -e + - echo 'Validate CWL file(s)' + - cwltool --validate Dockstore.cwl + - cwltool --validate cwls/cgpmap-bamBaiOut.cwl + - cwltool --validate cwls/cgpmap-bamCsiOut.cwl + - cwltool --validate cwls/cgpmap-cramOut.cwl + - echo 'Build and check docker image' + - docker build -t dockstore-cgpmap . - docker images | grep -c dockstore-cgpmap + - echo 'Verify program from each inherited package is found (dockstore-cgpbigwig)' + - docker run -ti --rm dockstore-cgpmap bwjoin --version + - echo 'Verify a program from each new package is found (dockstore-cgpmap)' + - docker run -ti --rm dockstore-cgpmap ds-cgpmap.pl -h + - docker run -ti --rm dockstore-cgpmap bwa_mem.pl -version + - docker run -ti --rm dockstore-cgpmap bammarkduplicates2 --version + - docker run -ti --rm dockstore-cgpmap samtools --version + - docker run -ti --rm dockstore-cgpmap bash -c 'bwa 2>&1 | grep Version' diff --git a/CHANGES.md b/CHANGES.md index d5ea2b3..25005e5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,35 +1,68 @@ -### 2.0.1 +# CHANGES + +## 3.0.0 + +* Drop pre/post exec functions in `mapping.sh` +* Add ability to pair a groups file with fastq inputs to add info to readgroups +in final BAM/CRAM files (PCAP-core). +* BWA 0.7.17 - bug fixes for future alpine building. +* Biobambam2 2.0.84 - via pre-compiled versions +* HTSlib + Samtools 1.7 +* PCAP-core 4.1.1 + * mismatchQc options added +* cgpBigWig 1.0.0 (via dockstore-cgpbigwig 2.0.0) +* Examples moved to more useful naming, now have: + * `examples/bamOutput` and `examples/cramOutput` each containing: + * `bam_input.json` + * `fastq_gz_input.json` - with yaml groupinfo file example. +* Multiple CWL descriptors one for BAM output, another for CRAM, legacy version retained. + * Ensure you use the correct `json` examples. +* Now using secondaryFiles for outputs to reduce repetitive entries in cwl and json. + * Bugfix to dockstore tool needed for output provisioning, 1.3.6+ + +## 2.0.1 + * Test data in `examples/sample_configs.local.json` moved to a non-expiring location. -### 2.0.0 +## 2.0.0 + * PCAP-core forked to cancerit and all legacy PCAWG code removed. * Update to cgpBigWig/libBigWig to handle bug detected in ASCAT. * First layer of streamlined install process to reduce build time of dependant images. * Biobambam2 now building from source (previously picked up precompiled 'etch'). -### 1.0.8 +## 1.0.8 + Fix in PCAP-core to handle passing of sample name when input BAM has no SM tag in header. -### 1.0.7 +## 1.0.7 + HTSlib upgades in toolset for consistency. -### 1.0.6 +## 1.0.6 + Bump PCAP-core version to fix fastq input handling -### 1.0.5 +## 1.0.5 + Adds travis-ci -### 1.0.4 +## 1.0.4 + Bad versions in 1.0.3 -### 1.0.3 +## 1.0.3 + Base PCAP-core upgraded to improve mapping through-put and CPU use. -### 1.0.2 +## 1.0.2 + Added build badges -### 1.0.1 +## 1.0.1 + Fix CWL to get description to display on Dockstore.org -### 1.0.0 +## 1.0.0 + Initial release. Fully functional. diff --git a/Dockerfile b/Dockerfile index 596bba7..94fd412 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,86 @@ -FROM ubuntu:14.04 +FROM quay.io/wtsicgp/dockstore-cgpbigwig:2.0.0 as builder -MAINTAINER keiranmraine@gmail.com +USER root -LABEL uk.ac.sanger.cgp="Cancer Genome Project, Wellcome Trust Sanger Institute" \ - version="2.0.3" \ - description="The CGP mapping pipeline for dockstore.org" +RUN apt-get -yq update +RUN apt-get install -yq --no-install-recommends\ + apt-transport-https\ + locales\ + curl\ + ca-certificates\ + libperlio-gzip-perl\ + make\ + bzip2\ + gcc\ + psmisc\ + time\ + zlib1g-dev\ + libbz2-dev\ + liblzma-dev\ + libcurl4-gnutls-dev\ + libncurses5-dev\ + nettle-dev\ + libp11-kit-dev\ + libtasn1-dev\ + libgnutls-dev -USER root +RUN locale-gen en_US.UTF-8 +RUN update-locale LANG=en_US.UTF-8 ENV OPT /opt/wtsi-cgp -ENV PATH $OPT/bin:$PATH +ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH ENV PERL5LIB $OPT/lib/perl5 ENV LD_LIBRARY_PATH $OPT/lib - -## USER CONFIGURATION -RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 RUN mkdir -p $OPT/bin -ADD scripts/mapping.sh $OPT/bin/mapping.sh -ADD scripts/ds-wrapper.pl $OPT/bin/ds-wrapper.pl -RUN chmod a+x $OPT/bin/mapping.sh $OPT/bin/ds-wrapper.pl +ADD build/opt-build.sh build/ +RUN bash build/opt-build.sh $OPT -ADD build/apt-build.sh build/ -RUN bash build/apt-build.sh +FROM ubuntu:16.04 -ADD build/perllib-build.sh build/ -RUN bash build/perllib-build.sh +MAINTAINER cgphelp@sanger.ac.uk -ADD build/opt-build.sh build/ -ADD build/biobambam2-build.sh build/ -RUN bash build/opt-build.sh $OPT +LABEL vendor="Cancer, Ageing and Somatic Mutation, Wellcome Trust Sanger Institute" +LABEL uk.ac.sanger.cgp.description="PCAP-core for dockstore.org" +LABEL uk.ac.sanger.cgp.version="3.0.0" + +RUN apt-get -yq update +RUN apt-get install -yq --no-install-recommends\ + apt-transport-https\ + locales\ + curl\ + ca-certificates\ + libperlio-gzip-perl\ + bzip2\ + psmisc\ + time\ + zlib1g\ + liblzma5\ + libncurses5\ + p11-kit + +RUN locale-gen en_US.UTF-8 +RUN update-locale LANG=en_US.UTF-8 + +ENV OPT /opt/wtsi-cgp +ENV PATH $OPT/bin:$OPT/biobambam2/bin:$PATH +ENV PERL5LIB $OPT/lib/perl5 +ENV LD_LIBRARY_PATH $OPT/lib +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 + +RUN mkdir -p $OPT +COPY --from=builder $OPT $OPT + +ADD scripts/mapping.sh $OPT/bin/mapping.sh +ADD scripts/ds-cgpmap.pl $OPT/bin/ds-cgpmap.pl +RUN chmod a+x $OPT/bin/mapping.sh $OPT/bin/ds-cgpmap.pl + +## USER CONFIGURATION +RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu USER ubuntu WORKDIR /home/ubuntu diff --git a/Dockstore.cwl b/Dockstore.cwl index 82335e3..0b33170 100644 --- a/Dockstore.cwl +++ b/Dockstore.cwl @@ -9,23 +9,19 @@ label: "CGP BWA-mem mapping flow" cwlVersion: v1.0 doc: | - ![build_status](https://quay.io/repository/wtsicgp/dockstore-cgpmap/status) - A Docker container for the CGP BWA-mem mapping flow. See the [dockstore-cgpmap](https://github.com/cancerit/dockstore-cgpmap) website for more information. + Please use one of the new tools for v3+: -dct:creator: - "@id": "http://orcid.org/0000-0002-5634-1539" - foaf:name: Keiran M Raine - foaf:mbox: "keiranmraine@gmail.com" + * [dockstore-cgpmap/cgpmap-bamOut](https://dockstore.org/containers/quay.io%2Fwtsicgp%2Fdockstore-cgpmap%2Fcgpmap-bamOut) + * [dockstore-cgpmap/cgpmap-cramOut](https://dockstore.org/containers/quay.io%2Fwtsicgp%2Fdockstore-cgpmap%2Fcgpmap-cramOut) + + ![build_status](https://quay.io/repository/wtsicgp/dockstore-cgpmap/status) + A Docker container for PCAP-core. See the [dockstore-cgpmap](https://github.com/cancerit/dockstore-cgpmap) website for more information. requirements: - - class: DockerRequirement - dockerPull: "quay.io/wtsicgp/dockstore-cgpmap:2.0.3" + - $mixin: cwls/mixins/requirements.yml hints: - - class: ResourceRequirement - coresMin: 1 # works but long, 8 recommended - ramMin: 15000 # good for WGS human ~30-60x - outdirMin: 5000000 # unlikely any BAM processing would be possible in less + - $mixin: cwls/mixins/hints.yml inputs: reference: @@ -52,33 +48,40 @@ inputs: position: 3 separate: true - scramble: - type: string? - doc: "Options to pass to scramble when generating CRAM output, see scramble docs" - default: '' - inputBinding: - prefix: -scramble - position: 4 - separate: true - shellQuote: true - bwa: type: string? default: ' -Y -K 100000000' doc: "Mapping and output parameters to pass to BWA-mem, see BWA docs, default ' -Y -K 100000000'" inputBinding: prefix: -bwa - position: 5 + position: 4 separate: true shellQuote: false - cram: + groupinfo: + type: File? + doc: "Readgroup metadata file for FASTQ inputs" + inputBinding: + prefix: -groupinfo + position: 5 + separate: true + + mmqc: type: boolean - doc: "Set if output should be in CRAM format instead of BAM, see 'scramble' for tuning parameters." + doc: "Apply mismatch QC to reads following duplicate marking." inputBinding: - prefix: -cram + prefix: -qc position: 6 + mmqcfrac: + type: float? + default: 0.05 + doc: "Mismatch fraction to set as max before failing a read [0.05]" + inputBinding: + prefix: -qcf + position: 7 + separate: true + bams_in: type: - 'null' @@ -86,7 +89,7 @@ inputs: items: File doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." inputBinding: - position: 7 + position: 8 outputs: out_bam: @@ -119,4 +122,19 @@ outputs: outputBinding: glob: $(inputs.sample).bam.maptime -baseCommand: ["/opt/wtsi-cgp/bin/ds-wrapper.pl"] +baseCommand: ["/opt/wtsi-cgp/bin/ds-cgpmap.pl"] + +$schemas: + - http://schema.org/docs/schema_org_rdfa.html + +$namespaces: + s: http://schema.org/ + +s:codeRepository: https://github.com/cancerit/dockstore-cgpmap +s:license: https://spdx.org/licenses/AGPL-3.0-only + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-5634-1539 + s:email: mailto:cgphelp@sanger.ac.uk + s:name: Keiran Raine diff --git a/README.md b/README.md index 1412fe8..1b81d02 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,109 @@ -dockstore-cgpmap -====== -`dockstore-cgpmap` provides a complete multi threaded BWA mem mapping workflow. This has been packaged specifically for use with the [Dockstore.org](https://dockstore.org/) framework. +# dockstore-cgpmap -[![Join the chat at https://gitter.im/dockstore-cgpmap/general](https://badges.gitter.im/dockstore-cgpmap/general.svg)](https://gitter.im/dockstore-cgpmap/general?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +`dockstore-cgpmap` provides a complete multi threaded BWA mem mapping workflow. This has been +packaged specifically for use with the [Dockstore.org](https://dockstore.org/) framework. -[![Docker Repository on Quay](https://quay.io/repository/wtsicgp/dockstore-cgpmap/status "Docker Repository on Quay")](https://quay.io/repository/wtsicgp/dockstore-cgpmap) +[![Gitter Badge][gitter-svg]][gitter-badge] -[![Build Status](https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=master)](https://travis-ci.org/cancerit/dockstore-cgpmap) : master -[![Build Status](https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=develop)](https://travis-ci.org/cancerit/dockstore-cgpmap) : develop +[![Quay Badge][quay-status]][quay-repo] + +| Master | Develop | +| --------------------------------------------- | ----------------------------------------------- | +| [![Master Badge][travis-master]][travis-base] | [![Develop Badge][travis-develop]][travis-base] | + + + +- [Supported input formats:](#supported-input-formats) +- [Options for customisation:](#options-for-customisation) +- [Usable Cores](#usable-cores) +- [Other uses](#other-uses) + - [Native docker](#native-docker) + - [Singularity](#singularity) +- [Test data](#test-data) +- [Release process](#release-process) +- [LICENCE](#licence) + + + +## Supported input formats: -## Supports input in following formats: * Multiple BAM * Multiple CRAM * Multiple fastq[.gz] (paired or interleaved) - * Please see [PCAP-core/bin/bwa_mem.pl](https://github.com/cancerit/PCAP-core/blob/master/bin/bwa_mem.pl) for formatting of file names. + * Please see [PCAP-core/bin/bwa_mem.pl][bwa-mem.pl] +for formatting of file names. ## Options for customisation: * BWA specific mapping parameters (defaults are based on attempts at a global standard). * Optionally output CRAM (scramble parameters can be modified) -# Test data -The `examples/sample_configs.local.json` contains test data that can be used to verify the tool. +## Usable Cores + +When running outside of a docker container you can set the number of CPUs via: + +* `export CPU=N` +* `-threads|-t` option of `ds-cgpmap.pl` + +If not set detects available cores on system. + +## Other uses -You can find expected outputs on the Sanger Institute FTP site: [dockstore-cgpmap-expected.tar.gz](ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/expected/dockstore-cgpmap-expected.tar.gz) +### Native docker + +All of the tools installed as part of [PCAP-core][pcap-core] are available for direct use. + +``` +export CGPMAP_VER=X.X.X +docker pull quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER +# interactive session +docker --rm -ti [--volume ...] quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER bash +``` + +### Singularity + +The resulting docker container has been tested with Singularity. The command to exec is: + +``` +ds-cgpmap.pl -h +``` -This project includes the C program `diff_bams` that can be used to compare the generated BAM file to the one in the archive: +Expected use would be along the lines of: + +``` +export CGPMAP_VER=X.X.X +singularity pull docker://quay.io/wtsicgp/dockstore-cgpmap:$CGPMAP_VER + +singularity exec\ + --workdir /.../workspace \ + --home /.../workspace:/home \ + --bind /.../ref/human:/var/spool/ref:ro \ + --bind /.../example_data/cgpmap/insilico_21:/var/spool/data:ro \ + dockstore-cgpmap-${CGPMAP_VER}.simg \ + ds-cgpmap.pl \ + -r /var/spool/ref/core_ref_GRCh37d5.tar.gz \ + -i /var/spool/ref/bwa_idx_GRCh37d5.tar.gz \ + -s SOMENAME \ + -t 6 \ + /var/spool/data/\*.bam +``` + +For a system automatically attaching _all local mount points_ (not default singularity behaviour) +you need not specify any `exec` params (workdir, home, bind) but you should specify the `-outdir` +option for `ds-cgpmap.pl` to prevent data being written to your home directory. + +By default results are written to the home directory of the container so ensure you bind +a large volume and set the `-home` variable. As indicated above the location can be overridden +via the options of `ds-cgpmap.pl` + +## Test data + +The `examples/` contains test data that can be used to verify the tool. + +You can find expected outputs on the Sanger Institute FTP site (bam output): [dockstore-cgpmap-expected.tar.gz][cgpmap-expected] + +This project includes the C program `diff_bams` that can be used to compare the generated BAM file +to the one in the archive: ```bash $ export CGPMAP_TAG=0.2.0 @@ -45,24 +123,24 @@ Reference sequence order passed Matching records: 1000001 ``` -Release process -=============== +## Release process + This project is maintained using HubFlow. 1. Make appropriate changes -2. Bump version in `Dockerfile` and `Dockstore.cwl` -3. Push changes -4. Check state on Travis -5. Generate the release (add notes to GitHub) -6. Confirm that image has been built on [quay.io](https://quay.io/repository/wtsicgp/dockstore-cgpmap?tab=builds) -7. Update the [dockstore](https://dockstore.org/containers/quay.io/wtsicgp/dockstore-cgpmap) entry, see [their docs](https://dockstore.org/docs/getting-started-with-dockstore). +1. Bump version in `Dockerfile` and `cwls/mixins/requirements.yml` +1. Push changes +1. Check state on Travis +1. Generate the release (add notes to GitHub) +1. Confirm that image has been built on [quay.io][quay-builds] +1. Update the [dockstore][dockstore-cgpmap] entry, see [their docs][dockstore-get-started]. -LICENCE -======= +## LICENCE -Copyright (c) 2016-2017 Genome Research Ltd. +``` +Copyright (c) 2016-2018 Genome Research Ltd. -Author: Cancer Genome Project +Author: CASM/Cancer IT This file is part of dockstore-cgpmap. @@ -88,3 +166,27 @@ reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012’." +``` + + +[bwa-mem.pl]: https://github.com/cancerit/PCAP-core/blob/master/bin/bwa_mem.pl +[cgpmap-expected]: ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/expected +[pcap-core]: https://github.com/cancerit/PCAP-core + + +[travis-base]: https://travis-ci.org/cancerit/dockstore-cgpmap +[travis-master]: https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=master +[travis-develop]: https://travis-ci.org/cancerit/dockstore-cgpmap.svg?branch=develop + + +[gitter-svg]: https://badges.gitter.im/dockstore-cgp/Lobby.svg +[gitter-badge]: https://gitter.im/dockstore-cgp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge + + +[quay-status]: https://quay.io/repository/wtsicgp/dockstore-cgpmap/status +[quay-repo]: https://quay.io/repository/wtsicgp/dockstore-cgpmap +[quay-builds]: https://quay.io/repository/wtsicgp/dockstore-cgpmap?tab=builds + + +[dockstore-cgpmap]: https://dockstore.org/containers/quay.io/wtsicgp/dockstore-cgpmap +[dockstore-get-started]: https://dockstore.org/docs/getting-started-with-dockstore diff --git a/build/apt-build.sh b/build/apt-build.sh deleted file mode 100755 index ba3b5fb..0000000 --- a/build/apt-build.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -set -eux - -apt-get -yq update -apt-get install -yq apt-transport-https -apt-get install -yq --no-install-recommends curl -apt-get install -yq --no-install-recommends build-essential -apt-get install -yq --no-install-recommends libcurl4-openssl-dev -apt-get install -yq --no-install-recommends nettle-dev -apt-get install -yq --no-install-recommends libncurses5-dev -apt-get install -yq autoconf -apt-get install -yq --no-install-recommends libtool -apt-get install -yq --no-install-recommends rsync -apt-get install -yq --no-install-recommends libexpat1-dev -apt-get install -yq --no-install-recommends time -apt-get install -yq --no-install-recommends lsof -apt-get install -yq --no-install-recommends libgoogle-perftools-dev diff --git a/build/biobambam2-build.sh b/build/biobambam2-build.sh deleted file mode 100644 index c625994..0000000 --- a/build/biobambam2-build.sh +++ /dev/null @@ -1,125 +0,0 @@ -#!/bin/bash -LIBMAUSVERSION=2.0.312-release-20170208002133 -BIOBAMBAMVERSION=2.0.69-release-20170127133459 -SNAPPYVERSION=1.1.4 -IOLIBVERSION=1.14.8 -CHRPATHVERSION=0.16.1 -BUILDDIR=${PWD} -INSTALLDIR=${BUILDDIR}/install-dir -TOOLSDIR=${BUILDDIR}/tools-dir -PAR=`cat /proc/cpuinfo | egrep "^processor" | wc -l` - -set -uxe - -# get chrpath -if [ ! -f chrpath-${CHRPATHVERSION}.tar.gz ] ; then - curl -sSL "https://alioth.debian.org/frs/download.php/file/3979/chrpath-${CHRPATHVERSION}.tar.gz" > chrpath-${CHRPATHVERSION}.tar.gz -fi - -# get iolib -if [ ! -f io_lib-${IOLIBVERSION}.tar.gz ] ; then - curl -sSL "http://downloads.sourceforge.net/project/staden/io_lib/${IOLIBVERSION}/io_lib-${IOLIBVERSION}.tar.gz?&use_mirror=kent" \ - > io_lib-${IOLIBVERSION}.tar.gz -fi - -# get snappy -if [ ! -f snappy-${SNAPPYVERSION}.tar.gz ] ; then - curl -sSL https://github.com/google/snappy/archive/${SNAPPYVERSION}.tar.gz > snappy-${SNAPPYVERSION}.tar.gz -fi - -# get libmaus -if [ ! -f libmaus-${LIBMAUSVERSION}.tar.gz ] ; then - curl -sSL https://github.com/gt1/libmaus2/archive/${LIBMAUSVERSION}.tar.gz > libmaus-${LIBMAUSVERSION}.tar.gz -fi - -# get biobambam -if [ ! -f biobambam-${BIOBAMBAMVERSION}.tar.gz ] ; then - curl -sSL https://github.com/gt1/biobambam2/archive/${BIOBAMBAMVERSION}.tar.gz > biobambam-${BIOBAMBAMVERSION}.tar.gz -fi - -mkdir -p chrpath-${CHRPATHVERSION}-src -tar -C chrpath-${CHRPATHVERSION}-src --strip-components=1 -xzf chrpath-${CHRPATHVERSION}.tar.gz -mkdir -p chrpath-${CHRPATHVERSION}-build -cd chrpath-${CHRPATHVERSION}-build -${BUILDDIR}/chrpath-${CHRPATHVERSION}-src/configure --prefix=${TOOLSDIR} -make -j${PAR} -make -j${PAR} install -cd .. -rm -fR chrpath-${CHRPATHVERSION}-src chrpath-${CHRPATHVERSION}-build - -rm -fR ${INSTALLDIR} -mkdir -p ${INSTALLDIR} - -# build iolib -mkdir -p io_lib-${IOLIBVERSION}-src -tar -C io_lib-${IOLIBVERSION}-src --strip-components=1 -xzf io_lib-${IOLIBVERSION}.tar.gz -mkdir -p io_lib-${IOLIBVERSION}-build -cd io_lib-${IOLIBVERSION}-build -LDFLAGS="-Wl,-rpath=XORIGIN/../lib" ${BUILDDIR}/io_lib-${IOLIBVERSION}-src/configure --prefix=${INSTALLDIR} -make -j${PAR} -make -j${PAR} install -cd .. -rm -fR io_lib-${IOLIBVERSION}-src io_lib-${IOLIBVERSION}-build - -# build snappy -mkdir -p snappy-${SNAPPYVERSION}-src -tar -C snappy-${SNAPPYVERSION}-src --strip-components=1 -xzf snappy-${SNAPPYVERSION}.tar.gz -cd snappy-${SNAPPYVERSION}-src -autoreconf -i -f -cd ../ -mkdir -p snappy-${SNAPPYVERSION}-build -cd snappy-${SNAPPYVERSION}-build -LDFLAGS="-Wl,-rpath=XORIGIN/../lib" ${BUILDDIR}/snappy-${SNAPPYVERSION}-src/configure --prefix=${INSTALLDIR} -make -j${PAR} -make -j${PAR} install -cd .. -rm -fR snappy-${SNAPPYVERSION}-src snappy-${SNAPPYVERSION}-build - -# build libmaus -mkdir -p libmaus-${LIBMAUSVERSION}-src -tar -C libmaus-${LIBMAUSVERSION}-src --strip-components=1 -xzf libmaus-${LIBMAUSVERSION}.tar.gz -mkdir -p libmaus-${LIBMAUSVERSION}-build -cd libmaus-${LIBMAUSVERSION}-build -LDFLAGS="-Wl,-rpath=XORIGIN/../lib" ${BUILDDIR}/libmaus-${LIBMAUSVERSION}-src/configure --prefix=${INSTALLDIR} \ - --with-snappy=${INSTALLDIR} \ - --with-io_lib=${INSTALLDIR} -make -j${PAR} -make -j${PAR} install -cd .. -rm -fR libmaus-${LIBMAUSVERSION}-src libmaus-${LIBMAUSVERSION}-build - -# build biobambam -mkdir -p biobambam-${BIOBAMBAMVERSION}-src -tar -C biobambam-${BIOBAMBAMVERSION}-src --strip-components=1 -xzf biobambam-${BIOBAMBAMVERSION}.tar.gz -mkdir -p biobambam-${BIOBAMBAMVERSION}-build -cd biobambam-${BIOBAMBAMVERSION}-build -LDFLAGS="-Wl,-rpath=XORIGIN/../lib" ${BUILDDIR}/biobambam-${BIOBAMBAMVERSION}-src/configure --prefix=${INSTALLDIR} \ - --with-libmaus2=${INSTALLDIR} -make -j${PAR} -make -j${PAR} install -cd .. -rm -fR biobambam-${BIOBAMBAMVERSION}-src biobambam-${BIOBAMBAMVERSION}-build - -for i in `find ${INSTALLDIR} -name \*.so\*` ; do - ORIG=`objdump -x ${i} | grep RPATH | awk '{print $2}'` - MOD=`echo "$ORIG" | sed "s/XORIGIN/\\$ORIGIN/"` - ${TOOLSDIR}/bin/chrpath -r "${MOD}" ${i} -done - -for i in ${INSTALLDIR}/bin/* ; do - if [ ! -z `LANG=C file ${i} | egrep "ELF.*executable" | awk '{print $1}' | perl -p -e "s/://"` ] ; then - ORIG=`objdump -x ${i} | grep RPATH | awk '{print $2}'` - MOD=`echo "$ORIG" | sed "s/XORIGIN/\\$ORIGIN/"` - ${TOOLSDIR}/bin/chrpath -r "${MOD}" ${i} - fi -done - -rm -fR ${TOOLSDIR} - -# my additions -mv ${INSTALLDIR} biobambam -rm -f chrpath-${CHRPATHVERSION}.tar.gz -rm -f io_lib-${IOLIBVERSION}.tar.gz -rm -f snappy-${SNAPPYVERSION}.tar.gz -rm -f libmaus-${LIBMAUSVERSION}.tar.gz -rm -f biobambam-${BIOBAMBAMVERSION}.tar.gz diff --git a/build/opt-build.sh b/build/opt-build.sh index 5775372..aab9976 100755 --- a/build/opt-build.sh +++ b/build/opt-build.sh @@ -8,16 +8,17 @@ fi set -u +VER_BBB2="2.0.86-release-20180228171821" + ## for cgpBigWig -VER_BIODBHTS="2.7" -VER_LIBBW="0.3.1" -VER_CGPBIGWIG="0.4.1" +VER_BIODBHTS="2.9" +VER_LIBBW="0.4.2" # for PCAP -VER_BWA="v0.7.15" -VER_HTSLIB="1.3.2" -VER_SAMTOOLS="1.3.1" -VER_PCAP="4.0.2" +VER_BWA="v0.7.17" +VER_HTSLIB="1.7" +VER_SAMTOOLS="1.7" +VER_PCAP="4.1.3" if [ "$#" -lt "1" ] ; then echo "Please provide an installation path such as /opt/ICGC" @@ -48,43 +49,69 @@ else fi echo "Max compilation CPUs set to $CPU" - SETUP_DIR=$INIT_DIR/install_tmp mkdir -p $SETUP_DIR/distro # don't delete the actual distro directory until the very end mkdir -p $INST_PATH/bin cd $SETUP_DIR +## biobambam2 first +BB_INST=$INST_PATH/biobambam2 +if [ ! -e $SETUP_DIR/bbb2.sucess ]; then + curl -sSL --retry 10 https://github.com/gt1/biobambam2/releases/download/${VER_BBB2}/biobambam2-${VER_BBB2}-x86_64-etch-linux-gnu.tar.gz > distro.tar.gz + mkdir -p $BB_INST + tar --strip-components 3 -C $BB_INST -zxf distro.tar.gz + rm -f $BB_INST/bin/curl # don't let this file in SSL doesn't work + rm -rf distro.* distro/* + touch $SETUP_DIR/bbb2.success +fi + # make sure tools installed can see the install loc of libraries set +u export LD_LIBRARY_PATH=`echo $INST_PATH/lib:$LD_LIBRARY_PATH | perl -pe 's/:\$//;'` -export PATH=`echo $INST_PATH/bin:$PATH | perl -pe 's/:\$//;'` -export MANPATH=`echo $INST_PATH/man:$INST_PATH/share/man:$MANPATH | perl -pe 's/:\$//;'` +export PATH=`echo $INST_PATH/bin:$BB_INST/bin:$PATH | perl -pe 's/:\$//;'` +export MANPATH=`echo $INST_PATH/man:$BB_INST/man:$INST_PATH/share/man:$MANPATH | perl -pe 's/:\$//;'` export PERL5LIB=`echo $INST_PATH/lib/perl5:$PERL5LIB | perl -pe 's/:\$//;'` set -u ## INSTALL CPANMINUS set -eux curl -sSL https://cpanmin.us/ > $SETUP_DIR/cpanm -perl $SETUP_DIR/cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH App::cpanminus +perl $SETUP_DIR/cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH App::cpanminus rm -f $SETUP_DIR/cpanm ##### DEPS for cgpBigWig ##### ## HTSLIB (tar.bz2) if [ ! -e $SETUP_DIR/htslib.success ]; then + rm -rf htslib + mkdir -p htslib curl -sSL --retry 10 https://github.com/samtools/htslib/releases/download/${VER_HTSLIB}/htslib-${VER_HTSLIB}.tar.bz2 > distro.tar.bz2 - rm -rf distro/* - tar --strip-components 1 -C distro -jxf distro.tar.bz2 - cd distro + tar --strip-components 1 -C htslib -jxf distro.tar.bz2 + cd htslib ./configure --enable-plugins --enable-libcurl --prefix=$INST_PATH make clean make -j$CPU make install cd $SETUP_DIR - rm -rf distro.* distro/* + rm -rf distro.* touch $SETUP_DIR/htslib.success fi +## SAMTOOLS (tar.bz2) +if [ ! -e $SETUP_DIR/samtools.success ]; then + curl -sSL --retry 10 https://github.com/samtools/samtools/releases/download/${VER_SAMTOOLS}/samtools-${VER_SAMTOOLS}.tar.bz2 > distro.tar.bz2 + rm -rf distro/* + tar --strip-components 1 -C distro -xjf distro.tar.bz2 + cd distro + ./configure --enable-plugins --enable-libcurl --with-htslib=$INST_PATH --prefix=$INST_PATH + make clean + make -j$CPU all + make install + cd $SETUP_DIR + rm -rf distro.* distro/* + touch $SETUP_DIR/samtools.success +fi + ## LIB-BW (tar.gz) if [ ! -e $SETUP_DIR/libBigWig.success ]; then curl -sSL --retry 10 https://github.com/dpryan79/libBigWig/archive/${VER_LIBBW}.tar.gz > distro.tar.gz @@ -96,24 +123,6 @@ if [ ! -e $SETUP_DIR/libBigWig.success ]; then touch $SETUP_DIR/libBigWig.success fi -##### cgpBigWig installation -if [ ! -e $SETUP_DIR/cgpBigWig.success ]; then - curl -sSL --retry 10 https://github.com/cancerit/cgpBigWig/archive/${VER_CGPBIGWIG}.tar.gz > distro.tar.gz - rm -rf distro/* - tar --strip-components 1 -C distro -xzf distro.tar.gz - make -C distro/c clean - make -C distro/c -j$CPU prefix=$INST_PATH HTSLIB=$INST_PATH/lib - cp distro/bin/bam2bedgraph $INST_PATH/bin/. - cp distro/bin/bwjoin $INST_PATH/bin/. - cp distro/bin/bam2bw $INST_PATH/bin/. - cp distro/bin/bwcat $INST_PATH/bin/. - cp distro/bin/bam2bwbases $INST_PATH/bin/. - cp distro/bin/bg2bw $INST_PATH/bin/. - cp distro/bin/detectExtremeDepth $INST_PATH/bin/. - rm -rf distro.* distro/* - touch $SETUP_DIR/cgpBigWig.success -fi - ##### DEPS for PCAP - layered on top ##### ## build BWA (tar.gz) @@ -127,73 +136,48 @@ if [ ! -e $SETUP_DIR/bwa.success ]; then touch $SETUP_DIR/bwa.success fi -## SAMTOOLS (tar.bz2) -if [ ! -e $SETUP_DIR/samtools.success ]; then - curl -sSL --retry 10 https://github.com/samtools/samtools/releases/download/${VER_SAMTOOLS}/samtools-${VER_SAMTOOLS}.tar.bz2 > distro.tar.bz2 - rm -rf distro/* - tar --strip-components 1 -C distro -xjf distro.tar.bz2 - cd distro - ./configure --enable-plugins --enable-libcurl --with-htslib=$INST_PATH --prefix=$INST_PATH - make clean - make -j$CPU all - make install - cd $SETUP_DIR - rm -rf distro.* distro/* - touch $SETUP_DIR/samtools.success -fi - ## Bio::DB::HTS (tar.gz) if [ ! -e $SETUP_DIR/Bio-DB-HTS.success ]; then ## add perl deps - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Module::Build - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Bio::Root::Version + cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Module::Build + cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Bio::Root::Version curl -sSL --retry 10 https://github.com/Ensembl/Bio-DB-HTS/archive/${VER_BIODBHTS}.tar.gz > distro.tar.gz rm -rf distro/* tar --strip-components 1 -C distro -zxf distro.tar.gz cd distro perl Build.PL --install_base=$INST_PATH --htslib=$INST_PATH + ./Build + ./Build test ./Build install cd $SETUP_DIR rm -rf distro.* distro/* touch $SETUP_DIR/Bio-DB-HTS.success fi -## biobambam2 -if [ ! -e $SETUP_DIR/biobambam2.success ]; then - # co-located external script as it's complex - cd distro - rm -rf * - bash $SCRIPT_PATH/biobambam2-build.sh - cp biobambam/bin/* $INST_PATH/bin/. - rsync -rl biobambam/bin $INST_PATH/. - rsync -rl biobambam/include $INST_PATH/, - rsync -rl biobambam/lib $INST_PATH/. - rsync -rl biobambam/share $INST_PATH/. - cd $SETUP_DIR - rm -rf distro.* distro/* - touch $SETUP_DIR/biobambam2.success -fi - ##### PCAP-core installation if [ ! -e $SETUP_DIR/PCAP.success ]; then - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Const::Fast - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH File::Which + cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH Const::Fast + cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org -l $INST_PATH File::Which curl -sSL --retry 10 https://github.com/cancerit/PCAP-core/archive/${VER_PCAP}.tar.gz > distro.tar.gz rm -rf distro/* tar --strip-components 1 -C distro -xzf distro.tar.gz cd distro if [ ! -e $SETUP_DIR/pcap_c.success ]; then make -C c clean - make -C c -j$CPU prefix=$INST_PATH HTSLIB=$INST_PATH/lib + export REF_PATH=/tmp/REF_CACHE/cache/%2s/%2s/%s:http://www.ebi.ac.uk/ena/cram/md5/%s + export REF_CACHE=/tmp/REF_CACHE/cache/%2s/%2s/%s + mkdir -p /tmp/REF_CACHE + env HTSLIB=$SETUP_DIR/htslib make -C c -j$CPU prefix=$INST_PATH cp bin/bam_stats $INST_PATH/bin/. cp bin/reheadSQ $INST_PATH/bin/. cp bin/diff_bams $INST_PATH/bin/. + cp bin/mismatchQc $INST_PATH/bin/. touch $SETUP_DIR/pcap_c.success fi - cpanm --no-interactive --notest --mirror http://cpan.metacpan.org --notest -l $INST_PATH --installdeps . - cpanm -v --no-interactive --mirror http://cpan.metacpan.org -l $INST_PATH . + cpanm --no-wget --no-interactive --notest --mirror http://cpan.metacpan.org --notest -l $INST_PATH --installdeps . + cpanm -v --no-wget --no-interactive --mirror http://cpan.metacpan.org -l $INST_PATH . cd $SETUP_DIR rm -rf distro.* distro/* touch $SETUP_DIR/PCAP.success @@ -201,17 +185,3 @@ fi cd $HOME rm -rf $SETUP_DIR - -set +x - -echo " -################################################################ - - To use the non-central tools you need to set the following - export LD_LIBRARY_PATH=$INST_PATH/lib:\$LD_LIBRARY_PATH - export PATH=$INST_PATH/bin:\$PATH - export MANPATH=$INST_PATH/man:$INST_PATH/share/man:\$MANPATH - export PERL5LIB=$INST_PATH/lib/perl5:\$PERL5LIB - -################################################################ -" diff --git a/build/perllib-build.sh b/build/perllib-build.sh deleted file mode 100755 index 782620a..0000000 --- a/build/perllib-build.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -set -eux - -echo "Doing nothing, here for perl libs that won't install through cpanminus" diff --git a/cwls/cgpmap-bamBaiOut.cwl b/cwls/cgpmap-bamBaiOut.cwl new file mode 100644 index 0000000..16814fe --- /dev/null +++ b/cwls/cgpmap-bamBaiOut.cwl @@ -0,0 +1,115 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "cgpmap" + +label: "CGP BWA-mem mapping flow" + +cwlVersion: v1.0 + +doc: + $include: includes/doc.yml + +requirements: + - $mixin: mixins/requirements.yml + +hints: + - $mixin: mixins/hints.yml + +inputs: + reference: + type: File + doc: "The core reference (fa, fai, dict) as tar.gz" + inputBinding: + prefix: -reference + position: 1 + separate: true + + bwa_idx: + type: File + doc: "The BWA indexes in tar.gz" + inputBinding: + prefix: -bwa_idx + position: 2 + separate: true + + sample: + type: string + doc: "Sample name to be included in output BAM header, also used to name final file" + inputBinding: + prefix: -sample + position: 3 + separate: true + + bwa: + type: string? + default: ' -Y -K 100000000' + doc: "Mapping and output parameters to pass to BWA-mem, see BWA docs, default ' -Y -K 100000000'" + inputBinding: + prefix: -bwa + position: 4 + separate: true + shellQuote: false + + groupinfo: + type: File? + doc: "Readgroup metadata file for FASTQ inputs" + inputBinding: + prefix: -groupinfo + position: 5 + separate: true + + mmqc: + type: boolean + doc: "Apply mismatch QC to reads following duplicate marking." + inputBinding: + prefix: -qc + position: 6 + + mmqcfrac: + type: float? + default: 0.05 + doc: "Mismatch fraction to set as max before failing a read [0.05]" + inputBinding: + prefix: -qcf + position: 7 + separate: true + + seq_in: + type: + - 'null' + - type: array + items: File + doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." + inputBinding: + position: 8 + +outputs: + out_bam: + type: File + outputBinding: + glob: $(inputs.sample).bam + secondaryFiles: + - .bai + - .bas + - .md5 + - .met + - .maptime + +baseCommand: ["/opt/wtsi-cgp/bin/ds-cgpmap.pl"] + +$schemas: + - http://schema.org/docs/schema_org_rdfa.html + +$namespaces: + s: http://schema.org/ + +s:codeRepository: https://github.com/cancerit/dockstore-cgpmap +s:license: https://spdx.org/licenses/AGPL-3.0-only + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-5634-1539 + s:email: mailto:cgphelp@sanger.ac.uk + s:name: Keiran Raine diff --git a/cwls/cgpmap-bamCsiOut.cwl b/cwls/cgpmap-bamCsiOut.cwl new file mode 100644 index 0000000..934e261 --- /dev/null +++ b/cwls/cgpmap-bamCsiOut.cwl @@ -0,0 +1,115 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "cgpmap" + +label: "CGP BWA-mem mapping flow" + +cwlVersion: v1.0 + +doc: + $include: includes/doc.yml + +requirements: + - $mixin: mixins/requirements.yml + +hints: + - $mixin: mixins/hints.yml + +inputs: + reference: + type: File + doc: "The core reference (fa, fai, dict) as tar.gz" + inputBinding: + prefix: -reference + position: 1 + separate: true + + bwa_idx: + type: File + doc: "The BWA indexes in tar.gz" + inputBinding: + prefix: -bwa_idx + position: 2 + separate: true + + sample: + type: string + doc: "Sample name to be included in output BAM header, also used to name final file" + inputBinding: + prefix: -sample + position: 3 + separate: true + + bwa: + type: string? + default: ' -Y -K 100000000' + doc: "Mapping and output parameters to pass to BWA-mem, see BWA docs, default ' -Y -K 100000000'" + inputBinding: + prefix: -bwa + position: 4 + separate: true + shellQuote: false + + groupinfo: + type: File? + doc: "Readgroup metadata file for FASTQ inputs" + inputBinding: + prefix: -groupinfo + position: 5 + separate: true + + mmqc: + type: boolean + doc: "Apply mismatch QC to reads following duplicate marking." + inputBinding: + prefix: -qc + position: 6 + + mmqcfrac: + type: float? + default: 0.05 + doc: "Mismatch fraction to set as max before failing a read [0.05]" + inputBinding: + prefix: -qcf + position: 7 + separate: true + + seq_in: + type: + - 'null' + - type: array + items: File + doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." + inputBinding: + position: 8 + +outputs: + out_bam: + type: File + outputBinding: + glob: $(inputs.sample).bam + secondaryFiles: + - .csi + - .bas + - .md5 + - .met + - .maptime + +baseCommand: ["/opt/wtsi-cgp/bin/ds-cgpmap.pl", "-csi"] + +$schemas: + - http://schema.org/docs/schema_org_rdfa.html + +$namespaces: + s: http://schema.org/ + +s:codeRepository: https://github.com/cancerit/dockstore-cgpmap +s:license: https://spdx.org/licenses/AGPL-3.0-only + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-5634-1539 + s:email: mailto:cgphelp@sanger.ac.uk + s:name: Keiran Raine diff --git a/cwls/cgpmap-cramOut.cwl b/cwls/cgpmap-cramOut.cwl new file mode 100644 index 0000000..a59f8e4 --- /dev/null +++ b/cwls/cgpmap-cramOut.cwl @@ -0,0 +1,125 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool + +id: "cgpmap" + +label: "CGP BWA-mem mapping flow" + +cwlVersion: v1.0 + +doc: + $include: includes/doc.yml + +requirements: + - $mixin: mixins/requirements.yml + +hints: + - $mixin: mixins/hints.yml + +inputs: + reference: + type: File + doc: "The core reference (fa, fai, dict) as tar.gz" + inputBinding: + prefix: -reference + position: 1 + separate: true + + bwa_idx: + type: File + doc: "The BWA indexes in tar.gz" + inputBinding: + prefix: -bwa_idx + position: 2 + separate: true + + sample: + type: string + doc: "Sample name to be included in output CRAM header, also used to name final file" + inputBinding: + prefix: -sample + position: 3 + separate: true + + scramble: + type: string? + doc: "Options to pass to scramble when generating CRAM output, see scramble docs" + default: '' + inputBinding: + prefix: -scramble + position: 4 + separate: true + shellQuote: true + + bwa: + type: string? + default: ' -Y -K 100000000' + doc: "Mapping and output parameters to pass to BWA-mem, see BWA docs, default ' -Y -K 100000000'" + inputBinding: + prefix: -bwa + position: 5 + separate: true + shellQuote: true + + groupinfo: + type: File? + doc: "Readgroup metadata file for FASTQ inputs" + inputBinding: + prefix: -groupinfo + position: 6 + separate: true + + mmqc: + type: boolean + doc: "Apply mismatch QC to reads following duplicate marking." + inputBinding: + prefix: -qc + position: 7 + + mmqcfrac: + type: float? + default: 0.05 + doc: "Mismatch fraction to set as max before failing a read [0.05]" + inputBinding: + prefix: -qcf + position: 8 + separate: true + + seq_in: + type: + - 'null' + - type: array + items: File + doc: "Can be BAM, CRAM, fastq (paired or interleaved), BAM/CRAM can be mixed together but not FASTQ." + inputBinding: + position: 9 + +outputs: + out_cram: + type: File + outputBinding: + glob: $(inputs.sample).cram + secondaryFiles: + - .crai + - .bas + - .md5 + - .met + - .maptime + +baseCommand: ["/opt/wtsi-cgp/bin/ds-cgpmap.pl", "-cram"] + +$schemas: + - http://schema.org/docs/schema_org_rdfa.html + +$namespaces: + s: http://schema.org/ + +s:codeRepository: https://github.com/cancerit/dockstore-cgpmap +s:license: https://spdx.org/licenses/AGPL-3.0-only + +s:author: + - class: s:Person + s:identifier: https://orcid.org/0000-0002-5634-1539 + s:email: mailto:cgphelp@sanger.ac.uk + s:name: Keiran Raine diff --git a/cwls/includes/doc.yml b/cwls/includes/doc.yml new file mode 100644 index 0000000..2d25e2e --- /dev/null +++ b/cwls/includes/doc.yml @@ -0,0 +1,9 @@ +![build_status](https://quay.io/repository/wtsicgp/dockstore-cgpmap/status) +A Docker container for PCAP-core. See the [dockstore-cgpmap](https://github.com/cancerit/dockstore-cgpmap) website for more information. + +Please read the relevant [changes](https://github.com/cancerit/dockstore-cgpwgs/blob/master/CHANGES.md) +when upgrading. + +Parameters for a CWL definition are generally described in a json file, but parameters can be provided on the command line. + +To see the parameters descriptions please run: cwltool --tool-help path_to.cwl diff --git a/cwls/mixins/hints.yml b/cwls/mixins/hints.yml new file mode 100644 index 0000000..0a08955 --- /dev/null +++ b/cwls/mixins/hints.yml @@ -0,0 +1,4 @@ +class: ResourceRequirement +coresMin: 1 # works but long, 6 recommended +ramMin: 15000 # good for WGS human ~30-60x +outdirMin: 5000000 # unlikely any BAM processing would be possible in less diff --git a/cwls/mixins/requirements.yml b/cwls/mixins/requirements.yml new file mode 100644 index 0000000..ca87167 --- /dev/null +++ b/cwls/mixins/requirements.yml @@ -0,0 +1,2 @@ +class: DockerRequirement +dockerPull: "quay.io/wtsicgp/dockstore-cgpmap:3.0.0" diff --git a/examples/cgpmap/bamOut/bam_input.json b/examples/cgpmap/bamOut/bam_input.json new file mode 100644 index 0000000..55368f7 --- /dev/null +++ b/examples/cgpmap/bamOut/bam_input.json @@ -0,0 +1,21 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21.bam"} + ], + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_bam": { + "path": "/tmp/mapped.bam", + "class": "File" + } +} diff --git a/examples/cgpmap/bamOut/fastq_gz_input.json b/examples/cgpmap/bamOut/fastq_gz_input.json new file mode 100644 index 0000000..c878a89 --- /dev/null +++ b/examples/cgpmap/bamOut/fastq_gz_input.json @@ -0,0 +1,39 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10658_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10659_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10660_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10661_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10662_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10663_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10664_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10665_i.fq.gz"}, + ], + "groupinfo": { + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_fq.yaml", + "class": "File" + }, + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_bam": { + "path": "/tmp/mapped.bam", + "class": "File" + } +} diff --git a/examples/cgpmap/bamOutCsi/bam_input.json b/examples/cgpmap/bamOutCsi/bam_input.json new file mode 100644 index 0000000..55368f7 --- /dev/null +++ b/examples/cgpmap/bamOutCsi/bam_input.json @@ -0,0 +1,21 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21.bam"} + ], + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_bam": { + "path": "/tmp/mapped.bam", + "class": "File" + } +} diff --git a/examples/cgpmap/bamOutCsi/fastq_gz_input.json b/examples/cgpmap/bamOutCsi/fastq_gz_input.json new file mode 100644 index 0000000..c878a89 --- /dev/null +++ b/examples/cgpmap/bamOutCsi/fastq_gz_input.json @@ -0,0 +1,39 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10658_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10659_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10660_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10661_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10662_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10663_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10664_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10665_i.fq.gz"}, + ], + "groupinfo": { + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_fq.yaml", + "class": "File" + }, + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_bam": { + "path": "/tmp/mapped.bam", + "class": "File" + } +} diff --git a/examples/cgpmap/cramOut/bam_input.json b/examples/cgpmap/cramOut/bam_input.json new file mode 100644 index 0000000..b16b605 --- /dev/null +++ b/examples/cgpmap/cramOut/bam_input.json @@ -0,0 +1,21 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21.bam"} + ], + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_cram": { + "path": "/tmp/mapped.cram", + "class": "File" + } +} diff --git a/examples/cgpmap/cramOut/fastq_gz_input.json b/examples/cgpmap/cramOut/fastq_gz_input.json new file mode 100644 index 0000000..9504d95 --- /dev/null +++ b/examples/cgpmap/cramOut/fastq_gz_input.json @@ -0,0 +1,39 @@ +{ + "reference": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz", + "class": "File" + }, + "bwa_idx": { + "path": "ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/human/bwa_idx_GRCh37d5.tar.gz", + "class": "File" + }, + "seq_in": [ + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10658_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10659_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10660_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10661_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10662_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10663_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10664_i.fq.gz"}, + {"class": "File", + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_10665_i.fq.gz"}, + ], + "groupinfo": { + "path": "ftp://ngs.sanger.ac.uk/production/cancer/dockstore/cgpmap/insilico_21_fq.yaml", + "class": "File" + }, + "sample": "test", + "mmqc": false, + "mmqcfrag": 0.05, + "out_cram": { + "path": "/tmp/mapped.cram", + "class": "File" + } +} diff --git a/examples/sample_configs.local.json b/examples/cgpmap/pre_3.0.json similarity index 100% rename from examples/sample_configs.local.json rename to examples/cgpmap/pre_3.0.json diff --git a/scripts/ds-cgpmap.pl b/scripts/ds-cgpmap.pl new file mode 100755 index 0000000..25148d6 --- /dev/null +++ b/scripts/ds-cgpmap.pl @@ -0,0 +1,261 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Long; +use File::Path qw(make_path); +use Pod::Usage qw(pod2usage); +use Data::Dumper; +use autodie qw(:all); +use warnings FATAL => 'all'; + +pod2usage(-verbose => 1, -exitval => 1) if(@ARGV == 0); + +# set defaults +my %opts = ('csi' => 0, + 'c' => 0, + 'q' => 0, + 'sc' => q{}, + 'b' => q{}, + 'o' => $ENV{HOME}, + 't' => undef, + 'g' => undef, + 'f' => 0.05, + ); + +GetOptions( 'h|help' => \$opts{'h'}, + 'm|man' => \$opts{'m'}, + 'r|reference=s' => \$opts{'r'}, + 'i|bwa_idx=s' => \$opts{'i'}, + 's|sample=s' => \$opts{'s'}, + 'c|cram' => \$opts{'c'}, + 'sc|scramble:s' => \$opts{'sc'}, + 'csi' => \$opts{'csi'}, + 'b|bwa:s' => \$opts{'b'}, + 'g|groupinfo:s' => \$opts{'g'}, + 't|threads:i' => \$opts{'t'}, + 'o|outdir:s' => \$opts{'o'}, + 'q|qc' => \$opts{'q'}, + 'f|qcf:f' => \$opts{'f'}, +) or pod2usage(2); + +pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); +pod2usage(-verbose => 2, -exitval => 0) if(defined $opts{'m'}); + +delete $opts{'h'}; +delete $opts{'m'}; + +printf "Options loaded: \n%s\n",Dumper(\%opts); + +# figure out if ref already unpacked: +my $ref_area = $opts{'o'}.'/reference_files'; +my $ref_unpack = 1; +if($opts{'r'} eq $opts{'i'} && -d $opts{'r'}) { + $ref_area = $opts{'r'}; + $ref_unpack = 0; +} + +# make the param file +make_path($opts{'o'}) unless(-e $opts{'o'}); +my $run_file = $opts{'o'}.'/run.params'; +open my $FH,'>',$run_file or die "Failed to write to $run_file: $!"; +# Force explicit checking of file flush +print $FH "export PCAP_THREADED_NO_SCRIPT=1\n"; +print $FH "export PCAP_THREADED_FORCE_SYNC=1\n"; +print $FH "export PCAP_THREADED_LOADBACKOFF=1\n"; +print $FH "export PCAP_THREADED_REM_LOGS=1\n"; +# General params +printf $FH "REF_BASE='%s'\n", $ref_area; +printf $FH "SAMPLE_NAME='%s'\n", $opts{'s'}; +printf $FH "OUTPUT_DIR='%s'\n", $opts{'o'}; +printf $FH "CRAM='%d'\n", $opts{'c'}; +printf $FH "SCRAMBLE='%s'\n", $opts{'sc'} if(length $opts{'sc'} > 0); +printf $FH "CSI='%d'\n", $opts{'csi'}; +printf $FH "BWA_PARAM='%s'\n", $opts{'b'} if(length $opts{'b'} > 0); +printf $FH "GROUPINFO='%s'\n", $opts{'g'} if(defined $opts{'g'}); +printf $FH "CPU=%d\n", $opts{'t'} if(defined $opts{'t'}); +printf $FH "CLEAN_REF=%d\n", $ref_unpack; +printf $FH "INPUT='%s'\n", join ' ', @ARGV; +printf $FH "MMQC=%d\n", $opts{'q'}; +printf $FH "MMQCFRAC=%s\n", $opts{'f'} if(defined $opts{'f'}); +close $FH; + +if($ref_unpack) { + ## unpack the reference area: + make_path($ref_area); + my $untar = sprintf 'tar --strip-components 1 -C %s -zxvf %s', $ref_area, $opts{'r'}; + system($untar) && die $!; + $untar = sprintf 'tar --strip-components 1 -C %s -zxvf %s', $ref_area, $opts{'i'}; + system($untar) && die $!; +} + +exec('mapping.sh', $run_file); # I will never return to the perl code + +__END__ + + +=head1 NAME + +ds-cgpmap.pl - Generate the param file and execute mapping.sh (for dockstore) + +=head1 SYNOPSIS + +ds-cgpmap.pl [options] [file(s)...] + + Required parameters: + -reference -r Path to core reference tar.gz + - if already unpacked provide base directory + - see `-m` for full details + -bwa_idx -i Path to bwa index tar.gz + - if already unpacked provide base directory + - see `-m` for full details + -sample -s Sample name to be applied to output file. + + Optional parameters: + -cram -c Output cram, see '-sc' + -scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used + - '-I,-O' are used internally and should not be provided + -bwa -b Single quoted string of additional parameters to pass to BWA + - '-t,-p,-R' are used internally and should not be provided + -groupinfo -g Readgroup metadata file for FASTQ inputs, values are not validated (yaml). + -threads -t Set the number of cpu/cores available [default all]. + -outdir -o Set the output folder [$HOME] + -qc -q Apply mismatch QC to reads following duplicate marking + -qcf -f Mismatch fraction to set as max before failing a read [0.05] + + Other: + -help -h Brief help message. + -man -m Full documentation. + +File list can be full file names or wildcard, e.g. + +=over 4 + +=item mutiple BAM inputs + + ds-cgpmap.pl [options] input/*.bam + +=item multiple paired fastq inputs + + ds-cgpmap.pl [options] input/*_[12].fq[.gz] + +=item multiple interleaved paired fastq inputs + + ds-cgpmap.pl [options] input/*.fq[.gz] + +=item mixture of BAM and CRAM + + ds-cgpmap.pl [options] input/*.bam input/*.cram + +=back + +=head1 DESCRIPTION + +stuff + +=head1 OPTION DETAILS + +=over 4 + +=item B<-reference> B<-bwa_idx> + +B<-reference> should point to a core_ref_XXXX.tar.gz + +B<-bwa_idx> should point to a bwa_idx_XXXX.tar.gz + +See ftp://ftp.sanger.ac.uk/pub/cancer/dockstore/ + +If both are equal and directories will assume already unpacked as + + mkdir ref_base + tar -C ref_base --strip-components 1 -zxvf core_ref_XXXX.tar.gz + tar -C ref_base --strip-components 1 -zxvf bwa_idx_XXXX.tar.gz + +B<-reference> + +=item B<-sample> + +Name to be applied to output files. Special characters will not be magically fixed. + + +=item B<-cram> + +Final output file will be a CRAM file instead of BAM. To tune the the compression methods see then +B<-scramble> option. + +=item B<-scramble> + +Single quoted string of parameters to pass to Scramble when '-c' used. Please see the Scramble +documentation for details. + +Please note: '-I,-O' are used internally and should not be provided. + +=item B<-csi> + +Generate csi index instead of bai when output is BAM. + +=item B<-bwa> + +Single quoted string of additional parameters to pass to BWA. Please see the 'bwa mem' +documentation for details. + +Please note: '-t,-p,-R' are used internally and should not be provided. + +=item B<-groupinfo> + +Readgroup information metadata file, please see the PCAP wiki for format: + +https://github.com/cancerit/PCAP-core/wiki/File-Formats-groupinfo.yaml + +=item B<-threads> + +Sets the number of cores to be used during processing. Default to use all at appropriate +points in analysis. + +Recommend increments of 6 once 6 is exceeded. + +=item B<-outdir> + +Set the output directory. Defaults to $HOME. + +NOTE: Should B be set when working with dockstore wrapper. + +=item B<-qc> + +Apply mismatch QC to read with a mismatch fraction higher than that specified in B<-qcf>. + +=item B<-qcf> + +When B<-qc> is set reads with a mismatch rate greater than this value to QC_FAIL (512/0x200). + +An auxilary tag is also set so these can be identified. + +=back + +=head2 INPUT FILE TYPES + +There are several types of file that the script is able to process. + +=over 4 + +=item f[ast]q + +A standard uncompressed fastq file. Requires a pair of inputs with standard suffix of '_1' and '_2' +immediately prior to '.f[ast]q' or an interleaved f[ast]q file where read 1 and 2 are adjacent +in the file. + + +=item f[ast]q.gz + +As *.f[ast]q but compressed with gzip. + +=item bam + +Single lane BAM files, RG line is transfered to aligned files. Also accepts multi lane BAM. + +=item cram + +Single lane BAM files, RG line is transfered to aligned files. Also accepts multi lane CRAM. + +=back + +=cut diff --git a/scripts/ds-wrapper.pl b/scripts/ds-wrapper.pl deleted file mode 100755 index 1e4bebe..0000000 --- a/scripts/ds-wrapper.pl +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Getopt::Long; -use File::Path qw(make_path); -use Pod::Usage qw(pod2usage); -use Data::Dumper; -use autodie qw(:all); -use warnings FATAL => 'all'; - -pod2usage(-verbose => 1, -exitval => 1) if(@ARGV == 0); - -# set defaults -my %opts = ('c'=>0, - 'sc' => q{}, - 'b' => q{} - ,); - -GetOptions( 'h|help' => \$opts{'h'}, - 'm|man' => \$opts{'m'}, - 'r|reference=s' => \$opts{'r'}, - 'i|bwa_idx=s' => \$opts{'i'}, - 's|sample=s' => \$opts{'s'}, - 'c|cram' => \$opts{'c'}, - 'sc|scramble:s' => \$opts{'sc'}, - 'b|bwa:s' => \$opts{'b'}, -) or pod2usage(2); - -pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); -pod2usage(-verbose => 2, -exitval => 0) if(defined $opts{'m'}); - -delete $opts{'h'}; -delete $opts{'m'}; - -printf "Options loaded: \n%s\n",Dumper(\%opts); - -## unpack the reference area: -my $ref_area = $ENV{HOME}.'/reference_files'; -make_path($ref_area); -my $untar = sprintf 'tar --strip-components 1 -C %s -zxvf %s', $ref_area, $opts{'r'}; -system($untar) && die $!; -$untar = sprintf 'tar --strip-components 1 -C %s -zxvf %s', $ref_area, $opts{'i'}; -system($untar) && die $!; - -my $run_file = $ENV{HOME}.'/run.params'; -open my $FH,'>',$run_file or die "Failed to write to $run_file: $!"; -# Force explicit checking of file flush -printf $FH "export PCAP_THREADED_NO_SCRIPT=1\n"; -printf $FH "export PCAP_THREADED_FORCE_SYNC=1\n"; -# General params -printf $FH "REF_BASE='%s'\n", $ref_area; -printf $FH "SAMPLE_NAME='%s'\n", $opts{'s'}; -printf $FH "OUTPUT_DIR='%s'\n", $ENV{HOME}; -printf $FH "CRAM='%s'\n", $opts{'c'}; -printf $FH "SCRAMBLE='%s'\n", $opts{'sc'} if(length $opts{'sc'} > 0); -printf $FH "BWA_PARAM='%s'\n", $opts{'b'} if(length $opts{'b'} > 0); -printf $FH "INPUT='%s'\n", join ' ', @ARGV; -close $FH; - -exec('mapping.sh'); # I will never return to the perl code - -__END__ - - -=head1 NAME - -dh-wrapper.pl - Generate the param file and execute mapping.sh (for dockstore) - -=head1 SYNOPSIS - -dh-wrapper.pl [options] [file(s)...] - - Required parameters: - -reference -r Path to core reference tar.gz - -bwa_idx -i Path to bwa index tar.gz - -sample -s Sample name to be applied to output file. - - Optional parameters: - -cram -c Output cram, see '-sc' - -scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used - - '-I,-O' are used internally and should not be provided - -bwa -b Single quoted string of additional parameters to pass to BWA - - '-t,-p,-R' are used internally and should not be provided - - Other: - -help -h Brief help message. - -man -m Full documentation. - -File list can be full file names or wildcard, e.g. - -=over 4 - -=item mutiple BAM inputs - - dh-wrapper.pl [options] input/*.bam - -=item multiple paired fastq inputs - - dh-wrapper.pl [options] input/*_[12].fq[.gz] - -=item multiple interleaved paired fastq inputs - - dh-wrapper.pl [options] input/*.fq[.gz] - -=item mixture of BAM and CRAM - - dh-wrapper.pl [options] input/*.bam input/*.cram - -=back - -=head1 DESCRIPTION - -stuff - -=head1 OPTION DETAILS - -=over 4 - -=item B<-reference> - -Path to mapping tar.gz reference files - -=item B<-sample> - -Name to be applied to output files. Special characters will not be magically fixed. - -=back - -=head2 INPUT FILE TYPES - -There are several types of file that the script is able to process. - -=over 4 - -=item f[ast]q - -A standard uncompressed fastq file. Requires a pair of inputs with standard suffix of '_1' and '_2' -immediately prior to '.f[ast]q' or an interleaved f[ast]q file where read 1 and 2 are adjacent -in the file. - - -=item f[ast]q.gz - -As *.f[ast]q but compressed with gzip. - -=item bam - -Single lane BAM files, RG line is transfered to aligned files. Also accepts multi lane BAM. - -=item cram - -Single lane BAM files, RG line is transfered to aligned files. Also accepts multi lane CRAM. - -=back - -=cut diff --git a/scripts/mapping.sh b/scripts/mapping.sh index 9d1d45e..cf03f60 100755 --- a/scripts/mapping.sh +++ b/scripts/mapping.sh @@ -9,22 +9,24 @@ echo -e "\nStart workflow: `date`\n" declare -a PRE_EXEC declare -a POST_EXEC -if [ -z ${PARAM_FILE+x} ] ; then +if [[ $# -eq 1 ]] ; then + PARAM_FILE=$1 +elif [ -z ${PARAM_FILE+x} ] ; then PARAM_FILE=$HOME/run.params fi + echo "Loading user options from: $PARAM_FILE" if [ ! -f $PARAM_FILE ]; then echo -e "\tERROR: file indicated by PARAM_FILE not found: $PARAM_FILE" 1>&2 exit 1 fi source $PARAM_FILE -env if [ -z ${CPU+x} ]; then CPU=`grep -c ^processor /proc/cpuinfo` fi -if [ -d $INPUT ] ; then +if [ -d "$INPUT" ] ; then INPUT="$INPUT/*" fi @@ -33,6 +35,8 @@ echo -e "\tSAMPLE_NAME : $SAMPLE_NAME" echo -e "\tINPUT : $INPUT" echo -e "\tREF_BASE : $REF_BASE" echo -e "\tCRAM : $CRAM" +echo -e "\tCSI : $CSI" +echo -e "\tMMQC : $MMQC" if [ -z ${SCRAMBLE+x} ]; then echo -e "\tSCRAMBLE : " else @@ -43,6 +47,11 @@ if [ -z ${BWA_PARAM+x} ]; then else echo -e "\tBWA_PARAM : $BWA_PARAM" fi +if [ -z ${GROUPINFO+x} ]; then + echo -e "\tGROUPINFO : " +else + echo -e "\tGROUPINFO : $GROUPINFO" +fi set +u if [ ${#PRE_EXEC[@]} -eq 0 ]; then @@ -56,28 +65,17 @@ fi set -u mkdir -p $OUTPUT_DIR -# run any pre-exec step before attempting to access BAMs -# logically the pre-exec could be pulling them -if [ ! -f $OUTPUT_DIR/pre-exec.done ]; then - echo -e "\nRun PRE_EXEC: `date`" - - for i in "${PRE_EXEC[@]}"; do - set -x - $i - { set +x; } 2> /dev/null - done - touch $OUTPUT_DIR/pre-exec.done -fi +TIME_EXT="bam" ADD_ARGS='' if [ $CRAM -gt 0 ]; then ADD_ARGS="$ADD_ARGS -c" + TIME_EXT="cram" if [ ! -z ${SCRAMBLE+x} ]; then ADD_ARGS="$ADD_ARGS -sc ' $SCRAMBLE'"; fi fi - # use a different malloc library when cores for mapping are over 8 if [ $CPU -gt 7 ]; then ADD_ARGS="$ADD_ARGS -l /usr/lib/libtcmalloc_minimal.so" @@ -85,12 +83,30 @@ fi # if BWA_PARAM set if [ ! -z ${BWA_PARAM+x} ]; then - ADD_ARGS="$ADD_ARGS -b ' $BWA_PARAM'" + ADD_ARGS="$ADD_ARGS -b '$BWA_PARAM'" +fi + +# if GROUPINFO set +if [ ! -z ${GROUPINFO+x} ]; then + ADD_ARGS="$ADD_ARGS -g $GROUPINFO" +fi + +# if CSI set +if [ $CSI -gt 0 ]; then + ADD_ARGS="$ADD_ARGS --csi" +fi + +# if GROUPINFO set +if [ $MMQC -gt 0 ]; then + ADD_ARGS="$ADD_ARGS --mmqc" + if [ ! -z ${MMQCFRAC+x} ]; then + ADD_ARGS="$ADD_ARGS --mmqcfrac $MMQCFRAC" + fi fi # -f set to be unfeasibly large to prevent splitting of lane data. set -x -bash -c "/usr/bin/time -f $TIME_FORMAT -o $OUTPUT_DIR/$SAMPLE_NAME.bam.maptime \ +bash -c "/usr/bin/time -f $TIME_FORMAT -o $OUTPUT_DIR/$SAMPLE_NAME.$TIME_EXT.maptime \ bwa_mem.pl -o $OUTPUT_DIR \ -r $REF_BASE/genome.fa \ -s $SAMPLE_NAME \ @@ -101,12 +117,9 @@ bash -c "/usr/bin/time -f $TIME_FORMAT -o $OUTPUT_DIR/$SAMPLE_NAME.bam.maptime \ $INPUT" { set +x; } 2> /dev/null -# run any post-exec step -echo -e "\nRun POST_EXEC: `date`" -for i in "${POST_EXEC[@]}"; do - set -x - $i - { set +x; } 2> /dev/null -done +# cleanup reference area, see ds-cgpmap.pl +if [ $CLEAN_REF -gt 0 ]; then + rm -rf $REF_BASE +fi echo -e "\nWorkflow end: `date`"