Skip to content

Commit 7f7e2ba

Browse files
committed
translate: Error when translating sequence lengths indivisible by 3
… instead of silently padding with N to translate to 'X', which became an unused code path after "Error when reference gene length is indivisible by 3" (0cdcaa8). Any usage of it now would be due to an internal error – a sequence with 1 or 2 extra bases indicates a problem with the input where translating to 'X' would wrongly imply 3 bases.
1 parent 8490941 commit 7f7e2ba

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

augur/translate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,25 @@ def safe_translate(sequence):
5151
>>> safe_translate("")
5252
''
5353
>>> safe_translate("ATGT")
54-
'MX'
54+
Traceback (most recent call last):
55+
...
56+
ValueError: Sequence length 4 is not divisible by 3.
5557
"""
5658
from Bio.Data.CodonTable import TranslationError
5759
from Bio.Seq import CodonTable
5860

59-
#sequences not mod 3 give messy BiopythonWarning, so avoid by padding.
6061
if len(sequence)%3:
61-
sequence_padded = sequence + "N"*(3-len(sequence)%3)
62-
else:
63-
sequence_padded = sequence
62+
raise ValueError(f"Sequence length {len(sequence)} is not divisible by 3.")
63+
6464
try:
6565
# Attempt translation by extracting the sequence according to the
6666
# BioPhython SeqFeature in frame gaps of three will translate as '-'
67-
translated_sequence = str(Seq.Seq(sequence_padded).translate(gap='-'))
67+
translated_sequence = str(Seq.Seq(sequence).translate(gap='-'))
6868
except TranslationError:
6969
# Any other codon like '-AA' or 'NNT' etc will fail. Translate codons
7070
# one by one.
7171
codon_table = CodonTable.ambiguous_dna_by_name['Standard'].forward_table
72-
str_seq = str(sequence_padded)
72+
str_seq = str(sequence)
7373
codons = np.frombuffer(str_seq[:len(str_seq) - len(str_seq) % 3].encode(), dtype='S3').astype("U")
7474
assert len(codons) > 0
7575
aas = []

tests/test_translate.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
import sys
66

7+
import pytest
78
from Bio.Seq import Seq
89
from Bio.SeqFeature import SeqFeature, FeatureLocation
910

@@ -23,14 +24,29 @@ def test_safe_translate(self):
2324
(('ATG---',), 'M-'),
2425
(('ATGTAG',), 'M*'),
2526
(('',), ''),
26-
(('ATGT',), 'MX'),
2727
(('ATGA-G',), 'MX')]
2828

2929
# input each pair into the function and check
3030
for pair in params_and_outs:
3131
params, out = pair
3232
assert translate.safe_translate(*params) == out
3333

34+
def test_safe_translate_errors(self):
35+
'''
36+
Test that safe_translate raises ValueError when sequence length is not divisible by 3
37+
'''
38+
invalid_sequences = [
39+
'A',
40+
'AT',
41+
'ATGT',
42+
'ATGTA',
43+
'ATGTAGA',
44+
]
45+
46+
for seq in invalid_sequences:
47+
with pytest.raises(ValueError, match="not divisible by 3"):
48+
translate.safe_translate(seq)
49+
3450
def test_translate_feature(self):
3551
'''
3652
Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids

0 commit comments

Comments
 (0)