-
Notifications
You must be signed in to change notification settings - Fork 136
Aligner extra args #1790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Aligner extra args #1790
Changes from 5 commits
06f13fa
c1b04fb
951c9be
94cdea5
b251d0e
30044ea
eddc615
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,10 @@ | |
| from .utils import nthreads_value | ||
| from collections import defaultdict | ||
|
|
||
| DEFAULT_ARGS = { | ||
| "mafft": "--reorder --anysymbol --nomemsave --adjustdirection", | ||
| } | ||
|
|
||
| class AlignmentError(Exception): | ||
| # TODO: this exception should potentially be renamed and made augur-wide | ||
| # thus allowing any module to raise it and have the message printed & augur | ||
|
|
@@ -31,6 +35,8 @@ def register_arguments(parser): | |
| parser.add_argument('--nthreads', type=nthreads_value, default=1, | ||
| help="number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used") | ||
| parser.add_argument('--method', default='mafft', choices=["mafft"], help="alignment program to use") | ||
| parser.add_argument('--alignment-args', help="arguments to pass to the alignment program (except for threads, keeplength if --existing-alignment is passed), overriding defaults. " + | ||
| f"mafft defaults: '{DEFAULT_ARGS['mafft']}'") | ||
| parser.add_argument('--reference-name', metavar="NAME", type=str, help="strip insertions relative to reference sequence; use if the reference is already in the input sequences") | ||
| parser.add_argument('--reference-sequence', metavar="PATH", type=str, help="Add this reference sequence to the dataset & strip insertions relative to this. Use if the reference is NOT already in the input sequences") | ||
| parser.add_argument('--remove-reference', action="store_true", default=False, help="remove reference sequence from the alignment") | ||
|
|
@@ -132,7 +138,7 @@ def run(args): | |
|
|
||
| # generate alignment command & run | ||
| log = args.output + ".log" | ||
| cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log) | ||
| cmd = generate_alignment_cmd(args.method, args.nthreads, existing_aln_fname, seqs_to_align_fname, args.output, log, alignment_args=args.alignment_args) | ||
| success = run_shell_command(cmd) | ||
| if not success: | ||
| raise AlignmentError(f"Error during alignment: please see the log file {log!r} for more details") | ||
|
|
@@ -248,17 +254,28 @@ def read_reference(ref_fname): | |
| "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname) | ||
| return ref_seq | ||
|
|
||
| def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname): | ||
| def generate_alignment_cmd(method, nthreads, existing_aln_fname, seqs_to_align_fname, aln_fname, log_fname, alignment_args): | ||
| if method not in DEFAULT_ARGS: | ||
| raise AlignmentError('ERROR: alignment method %s not implemented'%method) | ||
|
|
||
| if alignment_args is None: | ||
| alignment_args = DEFAULT_ARGS[method] | ||
|
|
||
| if method=='mafft': | ||
| files_to_align = shquote(seqs_to_align_fname) | ||
| if existing_aln_fname: | ||
| cmd = "mafft --add %s --keeplength --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(shquote(seqs_to_align_fname), nthreads, shquote(existing_aln_fname), shquote(aln_fname), shquote(log_fname)) | ||
| else: | ||
| cmd = "mafft --reorder --anysymbol --nomemsave --adjustdirection --thread %d %s 1> %s 2> %s"%(nthreads, shquote(seqs_to_align_fname), shquote(aln_fname), shquote(log_fname)) | ||
| # If there is an existing alignment, then seqs_to_align_fname becomes a parameter of --add | ||
| # and existing_aln_fname becomes the anonymous parameter | ||
| files_to_align = f"--add {shquote(seqs_to_align_fname)} {shquote(existing_aln_fname)}" | ||
| alignment_args = " ".join(["--keeplength", alignment_args]) | ||
|
|
||
| cmd = f"mafft {alignment_args} --thread {nthreads} {files_to_align} 1> {shquote(aln_fname)} 2> {shquote(log_fname)}" | ||
|
Comment on lines
+274
to
+277
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be checked that Actually, I think |
||
| print("\nusing mafft to align via:\n\t" + cmd + | ||
| " \n\n\tKatoh et al, Nucleic Acid Research, vol 30, issue 14" | ||
| "\n\thttps://doi.org/10.1093%2Fnar%2Fgkf436\n") | ||
| else: | ||
| raise AlignmentError('ERROR: alignment method %s not implemented'%method) | ||
|
|
||
| return cmd | ||
|
|
||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.