Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## __NEXT__

* align: Added `--alignment-args` options for passing arguments to the alignment program. [#1789] (@vbadelita)

## 30.0.0 (15 April 2025)

Expand Down
27 changes: 22 additions & 5 deletions augur/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from .utils import nthreads_value
from collections import defaultdict

DEFAULT_ARGS = {
"mafft": "--reorder --anysymbol --nomemsave --adjustdirection",
}

class AlignmentError(Exception):
# TODO: this exception should potentially be renamed and made augur-wide
# thus allowing any module to raise it and have the message printed & augur
Expand All @@ -31,6 +35,8 @@ def register_arguments(parser):
parser.add_argument('--nthreads', type=nthreads_value, default=1,
help="number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used")
parser.add_argument('--method', default='mafft', choices=["mafft"], help="alignment program to use")
parser.add_argument('--alignment-args', help="arguments to pass to the alignment program (except for threads, keeplength if --existing-alignment is passed), overriding defaults. " +
f"mafft defaults: '{DEFAULT_ARGS['mafft']}'")
parser.add_argument('--reference-name', metavar="NAME", type=str, help="strip insertions relative to reference sequence; use if the reference is already in the input sequences")
parser.add_argument('--reference-sequence', metavar="PATH", type=str, help="Add this reference sequence to the dataset & strip insertions relative to this. Use if the reference is NOT already in the input sequences")
parser.add_argument('--remove-reference', action="store_true", default=False, help="remove reference sequence from the alignment")
Expand Down Expand Up @@ -132,7 +138,7 @@ def run(args):

# generate alignment command & run
log = args.output + ".log"
cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log)
cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log, alignment_args=args.alignment_args)
success = run_shell_command(cmd)
if not success:
raise AlignmentError(f"Error during alignment: please see the log file {log!r} for more details")
Expand Down Expand Up @@ -248,17 +254,28 @@ def read_reference(ref_fname):
"\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)
return ref_seq

def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname):
def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname, alignment_args):
if method not in DEFAULT_ARGS:
raise AlignmentError('ERROR: alignment method %s not implemented'%method)

if alignment_args is None:
alignment_args = DEFAULT_ARGS[method]

if method=='mafft':
files_to_align = shquote(seqs_to_align_fname)
if existing_aln_fname:
cmd = "mafft --add %s --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(shquote(seqs_to_align_fname), nthreads, shquote(existing_aln_fname), shquote(aln_fname), shquote(log_fname))
else:
cmd = "mafft --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(nthreads, shquote(seqs_to_align_fname), shquote(aln_fname), shquote(log_fname))
# If there is an existing alignment, then seqs_to_align_fname becomes a parameter of --add
# and existing_aln_fname becomes the anonymous parameter
files_to_align = f"--add {shquote(seqs_to_align_fname)} {shquote(existing_aln_fname)}"
alignment_args = " ".join(["--keeplength", alignment_args])

cmd = f"mafft {alignment_args} --thread {nthreads} {files_to_align} 1> {shquote(aln_fname)} 2> {shquote(log_fname)}"
Comment on lines +274 to +277
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be checked that alignment_args does not conflict with the hardcoded options (--add, --keeplength, --thread). See usage of augur.tree.check_conflicting_args as an example of how this is done in augur tree.

Actually, I think augur.tree.check_conflicting_args can be repurposed and used here by moving into augur.utils and replacing the "tree builder" references with something like "external program", but that's extra work. It's fine to copy the function as augur.align.check_conflicting_args.

print("\nusing mafft to align via:\n\t" + cmd +
" \n\n\tKatoh et al, Nucleic Acid Research, vol 30, issue 14"
"\n\thttps://doi.org/10.1093%2Fnar%2Fgkf436\n")
else:
raise AlignmentError('ERROR: alignment method %s not implemented'%method)

return cmd


Expand Down
43 changes: 39 additions & 4 deletions tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_prettify_alignment(self):

def test_generate_alignment_cmd_non_mafft(self):
with pytest.raises(align.AlignmentError):
assert align.generate_alignment_cmd('no-mafft', 1, None, None, None, None)
assert align.generate_alignment_cmd('no-mafft', 1, None, None, None, None, alignment_args=None)

def test_generate_alignment_cmd_mafft_existing_aln_fname(self):
existing_aln_fname = "existing_aln"
Expand All @@ -199,9 +199,10 @@ def test_generate_alignment_cmd_mafft_existing_aln_fname(self):
existing_aln_fname,
seqs_to_align_fname,
aln_fname,
log_fname)
log_fname,
alignment_args=None)

expected = "mafft --add %s --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s" % (quote(seqs_to_align_fname), 1, quote(existing_aln_fname), quote(aln_fname), quote(log_fname))
expected = "mafft --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d --add %s %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(existing_aln_fname), quote(aln_fname), quote(log_fname))

assert result == expected

Expand All @@ -214,11 +215,45 @@ def test_generate_alignment_cmd_mafft_no_existing_aln_fname(self):
None,
seqs_to_align_fname,
aln_fname,
log_fname)
log_fname,
alignment_args=None)

expected = "mafft --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_generate_alignment_cmd_mafft_custom_args_existing_aln_fname(self):
existing_aln_fname = "existing_aln"
seqs_to_align_fname = "seqs_to_align"
aln_fname = "aln_fname"
log_fname = "log_fname"

result = align.generate_alignment_cmd("mafft", 1,
existing_aln_fname,
seqs_to_align_fname,
aln_fname,
log_fname,
alignment_args="--auto")

expected = "mafft --keeplength --auto --thread %d --add %s %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(existing_aln_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_generate_alignment_cmd_mafft_custom_args_no_existing_aln_fname(self):
seqs_to_align_fname = "seqs_to_align"
aln_fname = "aln_fname"
log_fname = "log_fname"

result = align.generate_alignment_cmd("mafft", 1,
None,
seqs_to_align_fname,
aln_fname,
log_fname,
alignment_args="--auto --anysymbol")

expected = "mafft --auto --anysymbol --thread %d %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_read_alignment(self):
data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta')
Expand Down
Loading