22
33params. kraken = " /home/merlin_szymanski/Kraken/install/"
44params. kmers = [" 22" ]
5- params. outdir = " ./out"
5+ params. outdir = false
6+
7+ def helpMessage(){
8+ log. info"""
9+ DATASTRUCTURE PIPELINE
10+
11+ Download all mammalian mitochondiral genomes from the current NCBI/Refseq
12+ release and create the datastructure required by the sediment_nf pipeline.
13+ Creates 4 folders:
14+
15+ 1. ncbi: raw Downloaded files from NCBI
16+ 2. genomes: Fasta files grouped by Family
17+ 3. masked: For all fasta files in 'genomes' a masked bed file
18+ 4. kraken: For the given kmers the kraken databases
19+
20+ USAGE:
21+ nextflow run path/to/main.nf --outdir ABSOLUTE_PATH
22+
23+ required:
24+ --outdir PATH: absolute path to the save-dir. e.g. "/mnt/scratch/.../out"
25+
26+ optional:
27+ --kraken PATH: path to your kraken installation folder.
28+ default: '/home/merlin_szymanski/Kraken/install'
29+
30+ --kmers ARRAY: Array of kmers for which databases should be created (this is not tested)
31+ default: '["22"]'
32+ """ . stripIndent()
33+ }
34+ if (params. outdir == false ){
35+ helpMessage()
36+ exit 0
37+ }
638
739
840process downloadGenomes{
9- publishDir " ${ params.outdir} /Downloads " , mode: ' link'
41+ publishDir " ${ params.outdir} /ncbi " , mode: ' link'
1042 tag " Downloading..."
1143
1244 output:
@@ -36,28 +68,45 @@ process extractFamilies{
3668}
3769
3870extracted_fasta
39- .map{[it. baseName. split(" _" )[0 ], it. baseName. split(" _" )[1 .. 2 ]. join(" _" ), file(it)]}
71+ .map{[it. baseName. split(" _" )[0 ], it. baseName. split(" _" )[1 .. -1 ]. join(" _" ), file(it)]}
4072 .set{extracted_fasta}
4173
4274
4375process writeFastas{
44- publishDir " ${ params.outdir} /Database/genomes/${ family} /" , saveAs: {" ${ species} .fasta" }
76+ publishDir " ${ params.outdir} /genomes/${ family} /" , saveAs: {" ${ species} .fasta" }, pattern: " *.fasta" , mode:' link'
77+
4578 tag " $family :$species "
4679
4780 input:
48- set family, species, file( fasta) from extracted_fasta
81+ set family, species, " input. fasta" from extracted_fasta
4982
5083 output:
51- set family, species, file(fasta) into (for_bed, for_kraken)
84+ set family, species, " output.fasta" into (for_bed, for_bwa, for_kraken)
85+
86+ script:
87+ """
88+ cat input.fasta > output.fasta
89+ """
90+ }
5291
92+ process indexFasta{
93+ publishDir " ${ params.outdir} /genomes/${ family} /" , mode: ' link'
94+ tag " $family :$species "
95+
96+ input:
97+ set family, species, " ${ species} .fasta" from for_bwa
98+
99+ output:
100+ file " ${ species} .fasta.*"
101+
53102 script:
54103 """
55- touch " $ fasta "
104+ bwa index " ${ species } . fasta"
56105 """
57106}
58107
59108process writeBedFiles{
60- publishDir " ${ params.outdir} /Database/ masked/${ family } / " , saveAs: {" ${ species} .masked.bed" }
109+ publishDir " ${ params.outdir} /masked/" , saveAs: {" ${ species} .masked.bed" }, mode: ' link '
61110 tag " $family :$species "
62111
63112 input:
@@ -71,7 +120,6 @@ process writeBedFiles{
71120 dustmasker -in species.fasta -outfmt acclist | \
72121 python3 $baseDir /bin/dustmasker_interval_to_bed.py \
73122 > species.masked.bed;
74-
75123 """
76124}
77125
@@ -82,17 +130,14 @@ for_kraken
82130
83131process createKrakenDB{
84132 conda " $baseDir /envs/environment.yml"
85- publishDir " ${ params.outdir} /Kraken/${ dbname} " , pattern: " *.{tmp, idx, kdb, txt}"
86- publishDir " ${ params.outdir} /Kraken/${ dbname} /taxonomy" , pattern: " *.{dmp}"
87133 tag " THIS TAKES A LONG TIME"
88134
89135 input:
90136 each kmer from params. kmers
91137 file fasta_list from for_kraken
92138
93139 output:
94- file " ${ dbname} /taxonomy/*.dmp"
95- file " ${ dbname} /*.{tmp, idx, kdb, txt}"
140+ file " output.txt" into log
96141
97142 script:
98143 dbname = " Mito_db_kmer${ kmer} "
@@ -104,7 +149,12 @@ process createKrakenDB{
104149 done
105150 ${ params.kraken} /kraken-build --build --db ${ dbname} --kmer $kmer
106151 ${ params.kraken} /kraken-build --clean --db ${ dbname}
107- find ${ dbname} -type f -exec touch {} +
152+ if [[ -d ${ params.outdir} /kraken ]];\
153+ then rm -fr ${ params.outdir} /kraken;\
154+ fi;
155+ mkdir ${ params.outdir} /kraken
156+ cp -r ${ dbname} ${ params.outdir} /kraken
157+ touch "output.txt"
108158 """
109159}
110160
0 commit comments