Skip to content

Commit c4eed12

Browse files
committed
Have a functional version
1 parent cbdcff8 commit c4eed12

File tree

2 files changed

+65
-17
lines changed

2 files changed

+65
-17
lines changed

.gitignore

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
.nextflow*
22
work*/
3-
Downloads/
4-
Database/
5-
Kraken/
3+
out/
64
conda/
75

main.nf

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,43 @@
22

33
params.kraken = "/home/merlin_szymanski/Kraken/install/"
44
params.kmers = ["22"]
5-
params.outdir = "./out"
5+
params.outdir = false
6+
7+
def helpMessage(){
8+
log.info"""
9+
DATASTRUCTURE PIPELINE
10+
11+
Download all mammalian mitochondiral genomes from the current NCBI/Refseq
12+
release and create the datastructure required by the sediment_nf pipeline.
13+
Creates 4 folders:
14+
15+
1. ncbi: raw Downloaded files from NCBI
16+
2. genomes: Fasta files grouped by Family
17+
3. masked: For all fasta files in 'genomes' a masked bed file
18+
4. kraken: For the given kmers the kraken databases
19+
20+
USAGE:
21+
nextflow run path/to/main.nf --outdir ABSOLUTE_PATH
22+
23+
required:
24+
--outdir PATH: absolute path to the save-dir. e.g. "/mnt/scratch/.../out"
25+
26+
optional:
27+
--kraken PATH: path to your kraken installation folder.
28+
default: '/home/merlin_szymanski/Kraken/install'
29+
30+
--kmers ARRAY: Array of kmers for which databases should be created (this is not tested)
31+
default: '["22"]'
32+
""".stripIndent()
33+
}
34+
if(params.outdir == false){
35+
helpMessage()
36+
exit 0
37+
}
638

739

840
process downloadGenomes{
9-
publishDir "${params.outdir}/Downloads", mode: 'link'
41+
publishDir "${params.outdir}/ncbi", mode: 'link'
1042
tag "Downloading..."
1143

1244
output:
@@ -36,28 +68,45 @@ process extractFamilies{
3668
}
3769

3870
extracted_fasta
39-
.map{[it.baseName.split("_")[0], it.baseName.split("_")[1..2].join("_"), file(it)]}
71+
.map{[it.baseName.split("_")[0], it.baseName.split("_")[1..-1].join("_"), file(it)]}
4072
.set{extracted_fasta}
4173

4274

4375
process writeFastas{
44-
publishDir "${params.outdir}/Database/genomes/${family}/", saveAs: {"${species}.fasta"}
76+
publishDir "${params.outdir}/genomes/${family}/", saveAs: {"${species}.fasta"}, pattern: "*.fasta", mode:'link'
77+
4578
tag "$family:$species"
4679

4780
input:
48-
set family, species, file(fasta) from extracted_fasta
81+
set family, species, "input.fasta" from extracted_fasta
4982

5083
output:
51-
set family, species, file(fasta) into (for_bed, for_kraken)
84+
set family, species, "output.fasta" into (for_bed, for_bwa, for_kraken)
85+
86+
script:
87+
"""
88+
cat input.fasta > output.fasta
89+
"""
90+
}
5291

92+
process indexFasta{
93+
publishDir "${params.outdir}/genomes/${family}/", mode: 'link'
94+
tag "$family:$species"
95+
96+
input:
97+
set family, species, "${species}.fasta" from for_bwa
98+
99+
output:
100+
file "${species}.fasta.*"
101+
53102
script:
54103
"""
55-
touch "$fasta"
104+
bwa index "${species}.fasta"
56105
"""
57106
}
58107

59108
process writeBedFiles{
60-
publishDir "${params.outdir}/Database/masked/${family}/", saveAs: {"${species}.masked.bed"}
109+
publishDir "${params.outdir}/masked/", saveAs: {"${species}.masked.bed"}, mode:'link'
61110
tag "$family:$species"
62111

63112
input:
@@ -71,7 +120,6 @@ process writeBedFiles{
71120
dustmasker -in species.fasta -outfmt acclist | \
72121
python3 $baseDir/bin/dustmasker_interval_to_bed.py \
73122
> species.masked.bed;
74-
75123
"""
76124
}
77125

@@ -82,17 +130,14 @@ for_kraken
82130

83131
process createKrakenDB{
84132
conda "$baseDir/envs/environment.yml"
85-
publishDir "${params.outdir}/Kraken/${dbname}", pattern: "*.{tmp, idx, kdb, txt}"
86-
publishDir "${params.outdir}/Kraken/${dbname}/taxonomy", pattern: "*.{dmp}"
87133
tag "THIS TAKES A LONG TIME"
88134

89135
input:
90136
each kmer from params.kmers
91137
file fasta_list from for_kraken
92138

93139
output:
94-
file "${dbname}/taxonomy/*.dmp"
95-
file "${dbname}/*.{tmp, idx, kdb, txt}"
140+
file "output.txt" into log
96141

97142
script:
98143
dbname = "Mito_db_kmer${kmer}"
@@ -104,7 +149,12 @@ process createKrakenDB{
104149
done
105150
${params.kraken}/kraken-build --build --db ${dbname} --kmer $kmer
106151
${params.kraken}/kraken-build --clean --db ${dbname}
107-
find ${dbname} -type f -exec touch {} +
152+
if [[ -d ${params.outdir}/kraken ]];\
153+
then rm -fr ${params.outdir}/kraken;\
154+
fi;
155+
mkdir ${params.outdir}/kraken
156+
cp -r ${dbname} ${params.outdir}/kraken
157+
touch "output.txt"
108158
"""
109159
}
110160

0 commit comments

Comments
 (0)