Skip to content

Commit 9057c08

Browse files
committed
dram mags added
1 parent 5b7c1a5 commit 9057c08

File tree

4 files changed

+95
-52
lines changed

4 files changed

+95
-52
lines changed

workflow/rules/annotate/__main__.smk

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include: "bakta.smk"
22
include: "quast.smk"
33
include: "gtdbtk.smk"
44
include: "dram.smk"
5+
include: "dram_mag.smk"
56
include: "eggnog.smk"
67
include: "checkm2.smk"
78
include: "proteinortho.smk"
@@ -17,5 +18,6 @@ rule annotate:
1718
rules.annotate__checkm2__predict.output,
1819
rules.annotate__gtdbtk__classify.output,
1920
rules.annotate__dram.input,
21+
rules.annotate__dram_mags.input,
2022
rules.annotate__proteinortho.output,
2123
rules.annotate__phylophlan.output,

workflow/rules/annotate/dram.smk

Lines changed: 3 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
rule _annotate__dram__annotate:
1+
rule annotate__dram__annotate:
22
"""Annotate dereplicate genomes with DRAM"""
33
input:
44
dereplicated_genomes=DREP / "dereplicated_genomes.fa.gz",
@@ -43,56 +43,8 @@ rule _annotate__dram__annotate:
4343
--gtdb_taxonomy {input.gtdbtk_summary} \
4444
2>> {log} 1>&2
4545
"""
46-
47-
#rule _annotate__dram__stack:
48-
# """Stack DRAM annotations"""
49-
# input:
50-
# annotation=DRAM / "annotations.tsv",
51-
# trnas=DRAM / "trnas.tsv",
52-
# rrnas=DRAM / "rrnas.tsv",
53-
# output:
54-
# tarball=DRAM / "annotate.tar.gz",
55-
# log:
56-
# DRAM / "annotate_stack.log",
57-
# conda:
58-
# "__environment__.yml"
59-
# container:
60-
# docker["dram"]
61-
# params:
62-
# config=config["dram-config"],
63-
# out_dir=DRAM,
64-
# tmp_dir=DRAM / "annotate",
65-
# threads: config["resources"]["cpu_per_task"]["multi_thread"]
66-
# resources:
67-
# cpu_per_task=config["resources"]["cpu_per_task"]["multi_thread"],
68-
# mem_per_cpu=config["resources"]["mem_per_cpu"]["quitehighmem"] // config["resources"]["cpu_per_task"]["multi_thread"],
69-
# time = config["resources"]["time"]["shortrun"],
70-
# partition = config["resources"]["partition"]["longrun"]
71-
# shell:
72-
# """
73-
#
74-
# for file in annotations trnas rrnas ; do
75-
# ( csvstack \
76-
# --tabs \
77-
# {params.tmp_dir}/*/$file.tsv \
78-
# | csvformat \
79-
# --out-tabs \
80-
# > {params.out_dir}/$file.tsv \
81-
# ) 2>> {log}
82-
# done
83-
#
84-
# tar \
85-
# --create \
86-
# --directory {params.out_dir} \
87-
# --file {output.tarball} \
88-
# --remove-files \
89-
# --use-compress-program="pigz --processes {threads}" \
90-
# --verbose \
91-
# annotate \
92-
# 2>> {log} 1>&2
93-
# """
9446

95-
rule _annotate__dram__distill:
47+
rule annotate__dram__distill:
9648
"""Distill DRAM annotations."""
9749
input:
9850
annotations=DRAM / "annotate" / "annotations.tsv",
@@ -131,8 +83,7 @@ rule _annotate__dram__distill:
13183
rmdir {params.outdir_tmp} 2>> {log} 1>&2
13284
"""
13385

134-
13586
rule annotate__dram:
13687
"""Run DRAM on dereplicated genomes."""
13788
input:
138-
rules._annotate__dram__distill.output,
89+
rules.annotate__dram__distill.output,
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# For now, I took the GTDBTK annotation out, as we get the taxonomic assignment also from the globlal run
2+
3+
rule annotate__dram_mag__annotate:
4+
"""Annotate dereplicate genomes with DRAM"""
5+
input:
6+
contigs=MAGSCOT / "{assembly_id}.fa.gz",
7+
#gtdbtk_summary=GTDBTK / "gtdbtk.summary.tsv",
8+
dram_db=features["databases"]["dram"],
9+
output:
10+
annotation=DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv",
11+
trnas=DRAMMAG / "{assembly_id}" / "annotate" / "trnas_{assembly_id}.tsv",
12+
rrnas=DRAMMAG / "{assembly_id}" / "annotate" / "rrnas_{assembly_id}.tsv",
13+
log:
14+
DRAM / "{assembly_id}" / "annotate_{assembly_id}.log",
15+
conda:
16+
"__environment__.yml"
17+
container:
18+
docker["dram"]
19+
params:
20+
config=config["dram-config"],
21+
min_contig_size=1500,
22+
out_dir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}",
23+
tmp_dir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}/annotate",
24+
threads: config["resources"]["cpu_per_task"]["multi_thread"]
25+
resources:
26+
cpu_per_task=config["resources"]["cpu_per_task"]["multi_thread"],
27+
mem_per_cpu=config["resources"]["mem_per_cpu"]["quitehighmem"] // config["resources"]["cpu_per_task"]["multi_thread"],
28+
time = config["resources"]["time"]["longrun"],
29+
nvme = config["resources"]["nvme"]["small"],
30+
partition = config["resources"]["partition"]["small"]
31+
shell:
32+
"""
33+
rm -rf {params.tmp_dir}
34+
35+
echo "Hostname: $(hostname)" 2>> {log} 1>&2
36+
echo "Temporary directory: $TMPDIR" 2>> {log} 1>&2
37+
df -h 2>> {log} 1>&2
38+
39+
DRAM.py annotate \
40+
--config_loc {params.config} \
41+
--input_fasta {input.contigs} \
42+
--output_dir {params.tmp_dir} \
43+
--threads {threads} \
44+
2>> {log} 1>&2
45+
"""
46+
47+
rule annotate__dram_mag__distill:
48+
"""Distill DRAM annotations."""
49+
input:
50+
annotation=DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv",
51+
trnas=DRAMMAG / "{assembly_id}" / "annotate" / "trnas_{assembly_id}.tsv",
52+
rrnas=DRAMMAG / "{assembly_id}" / "annotate" / "rrnas_{assembly_id}.tsv",
53+
dram_db=features["databases"]["dram"],
54+
output:
55+
genome=DRAMMAG / "{assembly_id}" / "genome_stats.tsv",
56+
metabolism=DRAMMAG / "{assembly_id}" / "metabolism_summary.xlsx",
57+
product_html=DRAMMAG / "{assembly_id}" / "product.html",
58+
product_tsv=DRAMMAG / "{assembly_id}" / "product.tsv",
59+
log:
60+
DRAMMAG / "{assembly_id}" / "distill.log2",
61+
conda:
62+
"__environment__.yml"
63+
container:
64+
docker["dram"]
65+
resources:
66+
mem_per_cpu=config["resources"]["mem_per_cpu"]["highmem"],
67+
time = config["resources"]["time"]["longrun"],
68+
params:
69+
config=config["dram-config"],
70+
outdir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}",
71+
outdir_tmp=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}/distill",
72+
shell:
73+
"""
74+
DRAM.py distill \
75+
--config_loc {params.config} \
76+
--input_file {input.annotations} \
77+
--rrna_path {input.rrnas} \
78+
--trna_path {input.trnas} \
79+
--output_dir {params.outdir_tmp} \
80+
2> {log} 1>&2
81+
82+
mv {params.outdir_tmp}/* {params.outdir}/ 2>> {log} 1>&2
83+
rmdir {params.outdir_tmp} 2>> {log} 1>&2
84+
"""
85+
86+
rule annotate__dram_mags:
87+
"""Run Bakta over the dereplicated mags"""
88+
input:
89+
expand(DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv", assembly_id=ASSEMBLIES),

workflow/rules/folders.smk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ GTDBTK = ANN / "gtdbtk/"
5555
QUAST = ANN / "quast/"
5656
CAMPER = ANN / "camper/"
5757
DRAM = ANN / "dram/"
58+
DRAMMAG = ANN / "dram_mags/"
5859
CHECKM = ANN / "checkm2"
5960
BAKTA = ANN / "bakta"
6061
BAKTAMAG = ANN / "bakta_mags"

0 commit comments

Comments
 (0)