|
| 1 | +# For now, I took the GTDBTK annotation out, as we get the taxonomic assignment also from the globlal run |
| 2 | + |
| 3 | +rule annotate__dram_mag__annotate: |
| 4 | + """Annotate dereplicate genomes with DRAM""" |
| 5 | + input: |
| 6 | + contigs=MAGSCOT / "{assembly_id}.fa.gz", |
| 7 | + #gtdbtk_summary=GTDBTK / "gtdbtk.summary.tsv", |
| 8 | + dram_db=features["databases"]["dram"], |
| 9 | + output: |
| 10 | + annotation=DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv", |
| 11 | + trnas=DRAMMAG / "{assembly_id}" / "annotate" / "trnas_{assembly_id}.tsv", |
| 12 | + rrnas=DRAMMAG / "{assembly_id}" / "annotate" / "rrnas_{assembly_id}.tsv", |
| 13 | + log: |
| 14 | + DRAM / "{assembly_id}" / "annotate_{assembly_id}.log", |
| 15 | + conda: |
| 16 | + "__environment__.yml" |
| 17 | + container: |
| 18 | + docker["dram"] |
| 19 | + params: |
| 20 | + config=config["dram-config"], |
| 21 | + min_contig_size=1500, |
| 22 | + out_dir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}", |
| 23 | + tmp_dir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}/annotate", |
| 24 | + threads: config["resources"]["cpu_per_task"]["multi_thread"] |
| 25 | + resources: |
| 26 | + cpu_per_task=config["resources"]["cpu_per_task"]["multi_thread"], |
| 27 | + mem_per_cpu=config["resources"]["mem_per_cpu"]["quitehighmem"] // config["resources"]["cpu_per_task"]["multi_thread"], |
| 28 | + time = config["resources"]["time"]["longrun"], |
| 29 | + nvme = config["resources"]["nvme"]["small"], |
| 30 | + partition = config["resources"]["partition"]["small"] |
| 31 | + shell: |
| 32 | + """ |
| 33 | + rm -rf {params.tmp_dir} |
| 34 | + |
| 35 | + echo "Hostname: $(hostname)" 2>> {log} 1>&2 |
| 36 | + echo "Temporary directory: $TMPDIR" 2>> {log} 1>&2 |
| 37 | + df -h 2>> {log} 1>&2 |
| 38 | + |
| 39 | + DRAM.py annotate \ |
| 40 | + --config_loc {params.config} \ |
| 41 | + --input_fasta {input.contigs} \ |
| 42 | + --output_dir {params.tmp_dir} \ |
| 43 | + --threads {threads} \ |
| 44 | + 2>> {log} 1>&2 |
| 45 | + """ |
| 46 | + |
| 47 | +rule annotate__dram_mag__distill: |
| 48 | + """Distill DRAM annotations.""" |
| 49 | + input: |
| 50 | + annotation=DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv", |
| 51 | + trnas=DRAMMAG / "{assembly_id}" / "annotate" / "trnas_{assembly_id}.tsv", |
| 52 | + rrnas=DRAMMAG / "{assembly_id}" / "annotate" / "rrnas_{assembly_id}.tsv", |
| 53 | + dram_db=features["databases"]["dram"], |
| 54 | + output: |
| 55 | + genome=DRAMMAG / "{assembly_id}" / "genome_stats.tsv", |
| 56 | + metabolism=DRAMMAG / "{assembly_id}" / "metabolism_summary.xlsx", |
| 57 | + product_html=DRAMMAG / "{assembly_id}" / "product.html", |
| 58 | + product_tsv=DRAMMAG / "{assembly_id}" / "product.tsv", |
| 59 | + log: |
| 60 | + DRAMMAG / "{assembly_id}" / "distill.log2", |
| 61 | + conda: |
| 62 | + "__environment__.yml" |
| 63 | + container: |
| 64 | + docker["dram"] |
| 65 | + resources: |
| 66 | + mem_per_cpu=config["resources"]["mem_per_cpu"]["highmem"], |
| 67 | + time = config["resources"]["time"]["longrun"], |
| 68 | + params: |
| 69 | + config=config["dram-config"], |
| 70 | + outdir=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}", |
| 71 | + outdir_tmp=lambda wildcards: f"{DRAMMAG}/{wildcards.assembly_id}/distill", |
| 72 | + shell: |
| 73 | + """ |
| 74 | + DRAM.py distill \ |
| 75 | + --config_loc {params.config} \ |
| 76 | + --input_file {input.annotations} \ |
| 77 | + --rrna_path {input.rrnas} \ |
| 78 | + --trna_path {input.trnas} \ |
| 79 | + --output_dir {params.outdir_tmp} \ |
| 80 | + 2> {log} 1>&2 |
| 81 | +
|
| 82 | + mv {params.outdir_tmp}/* {params.outdir}/ 2>> {log} 1>&2 |
| 83 | + rmdir {params.outdir_tmp} 2>> {log} 1>&2 |
| 84 | + """ |
| 85 | + |
| 86 | +rule annotate__dram_mags: |
| 87 | + """Run Bakta over the dereplicated mags""" |
| 88 | + input: |
| 89 | + expand(DRAMMAG / "{assembly_id}" / "annotate" / "annotations_{assembly_id}.tsv", assembly_id=ASSEMBLIES), |
0 commit comments