Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## __NEXT__

* align: Added `--alignment-args` options for passing arguments to the alignment program. [#1789] (@vbadelita)

## 30.0.0 (15 April 2025)

Expand Down
23 changes: 19 additions & 4 deletions augur/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from .utils import nthreads_value
from collections import defaultdict

DEFAULT_ARGS = {
"mafft": "--reorder --anysymbol --nomemsave --adjustdirection",
}

class AlignmentError(Exception):
# TODO: this exception should potentially be renamed and made augur-wide
# thus allowing any module to raise it and have the message printed & augur
Expand All @@ -31,6 +35,8 @@ def register_arguments(parser):
parser.add_argument('--nthreads', type=nthreads_value, default=1,
help="number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used")
parser.add_argument('--method', default='mafft', choices=["mafft"], help="alignment program to use")
parser.add_argument('--alignment-args', help="arguments to pass to the alignment program (except for threads, keeplength if --existing-alignment is passed), overriding defaults. " +
f"mafft defaults: '{DEFAULT_ARGS['mafft']}'")
parser.add_argument('--reference-name', metavar="NAME", type=str, help="strip insertions relative to reference sequence; use if the reference is already in the input sequences")
parser.add_argument('--reference-sequence', metavar="PATH", type=str, help="Add this reference sequence to the dataset & strip insertions relative to this. Use if the reference is NOT already in the input sequences")
parser.add_argument('--remove-reference', action="store_true", default=False, help="remove reference sequence from the alignment")
Expand Down Expand Up @@ -132,7 +138,7 @@ def run(args):

# generate alignment command & run
log = args.output + ".log"
cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log)
cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log, alignment_args=args.alignment_args)
success = run_shell_command(cmd)
if not success:
raise AlignmentError(f"Error during alignment: please see the log file {log!r} for more details")
Expand Down Expand Up @@ -248,17 +254,26 @@ def read_reference(ref_fname):
"\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)
return ref_seq

def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname):
def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname, alignment_args):
if method not in DEFAULT_ARGS:
raise AlignmentError('ERROR: alignment method %s not implemented'%method)

if alignment_args is None:
args = DEFAULT_ARGS[method]
else:
args = alignment_args

if method=='mafft':
if existing_aln_fname:
cmd = "mafft --add %s --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(shquote(seqs_to_align_fname), nthreads, shquote(existing_aln_fname), shquote(aln_fname), shquote(log_fname))
cmd = "mafft --add %s --keeplength %s --thread %d %s 1> %s 2> %s"%(shquote(seqs_to_align_fname), args, nthreads, shquote(existing_aln_fname), shquote(aln_fname), shquote(log_fname))
else:
cmd = "mafft --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(nthreads, shquote(seqs_to_align_fname), shquote(aln_fname), shquote(log_fname))
cmd = "mafft %s --thread %d %s 1> %s 2> %s"%(args, nthreads, shquote(seqs_to_align_fname), shquote(aln_fname), shquote(log_fname))
print("\nusing mafft to align via:\n\t" + cmd +
" \n\n\tKatoh et al, Nucleic Acid Research, vol 30, issue 14"
"\n\thttps://doi.org/10.1093%2Fnar%2Fgkf436\n")
else:
raise AlignmentError('ERROR: alignment method %s not implemented'%method)

return cmd


Expand Down
41 changes: 38 additions & 3 deletions tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_prettify_alignment(self):

def test_generate_alignment_cmd_non_mafft(self):
with pytest.raises(align.AlignmentError):
assert align.generate_alignment_cmd('no-mafft', 1, None, None, None, None)
assert align.generate_alignment_cmd('no-mafft', 1, None, None, None, None, alignment_args=None)

def test_generate_alignment_cmd_mafft_existing_aln_fname(self):
existing_aln_fname = "existing_aln"
Expand All @@ -199,7 +199,8 @@ def test_generate_alignment_cmd_mafft_existing_aln_fname(self):
existing_aln_fname,
seqs_to_align_fname,
aln_fname,
log_fname)
log_fname,
alignment_args=None)

expected = "mafft --add %s --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s" % (quote(seqs_to_align_fname), 1, quote(existing_aln_fname), quote(aln_fname), quote(log_fname))

Expand All @@ -214,11 +215,45 @@ def test_generate_alignment_cmd_mafft_no_existing_aln_fname(self):
None,
seqs_to_align_fname,
aln_fname,
log_fname)
log_fname,
alignment_args=None)

expected = "mafft --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_generate_alignment_cmd_mafft_custom_args_existing_aln_fname(self):
existing_aln_fname = "existing_aln"
seqs_to_align_fname = "seqs_to_align"
aln_fname = "aln_fname"
log_fname = "log_fname"

result = align.generate_alignment_cmd("mafft", 1,
existing_aln_fname,
seqs_to_align_fname,
aln_fname,
log_fname,
alignment_args="--auto")

expected = "mafft --add %s --keeplength --auto --thread %d %s 1> %s 2> %s" % (quote(seqs_to_align_fname), 1, quote(existing_aln_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_generate_alignment_cmd_mafft_custom_args_no_existing_aln_fname(self):
seqs_to_align_fname = "seqs_to_align"
aln_fname = "aln_fname"
log_fname = "log_fname"

result = align.generate_alignment_cmd("mafft", 1,
None,
seqs_to_align_fname,
aln_fname,
log_fname,
alignment_args="--auto --anysymbol")

expected = "mafft --auto --anysymbol --thread %d %s 1> %s 2> %s" % (1, quote(seqs_to_align_fname), quote(aln_fname), quote(log_fname))

assert result == expected

def test_read_alignment(self):
data_file = pathlib.Path('tests/data/align/test_aligned_sequences.fasta')
Expand Down