Skip to content

Commit ebb73d3

Browse files
authored
Merge pull request #1908: translate: Error when translating sequence lengths indivisible by 3
2 parents 4be3d1e + 7f7e2ba commit ebb73d3

File tree

2 files changed

+24
-8
lines changed

2 files changed

+24
-8
lines changed

augur/translate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,25 @@ def safe_translate(sequence):
5151
>>> safe_translate("")
5252
''
5353
>>> safe_translate("ATGT")
54-
'MX'
54+
Traceback (most recent call last):
55+
...
56+
ValueError: Sequence length 4 is not divisible by 3.
5557
"""
5658
from Bio.Data.CodonTable import TranslationError
5759
from Bio.Seq import CodonTable
5860

59-
#sequences not mod 3 give messy BiopythonWarning, so avoid by padding.
6061
if len(sequence)%3:
61-
sequence_padded = sequence + "N"*(3-len(sequence)%3)
62-
else:
63-
sequence_padded = sequence
62+
raise ValueError(f"Sequence length {len(sequence)} is not divisible by 3.")
63+
6464
try:
6565
# Attempt translation by extracting the sequence according to the
6666
# BioPhython SeqFeature in frame gaps of three will translate as '-'
67-
translated_sequence = str(Seq.Seq(sequence_padded).translate(gap='-'))
67+
translated_sequence = str(Seq.Seq(sequence).translate(gap='-'))
6868
except TranslationError:
6969
# Any other codon like '-AA' or 'NNT' etc will fail. Translate codons
7070
# one by one.
7171
codon_table = CodonTable.ambiguous_dna_by_name['Standard'].forward_table
72-
str_seq = str(sequence_padded)
72+
str_seq = str(sequence)
7373
codons = np.frombuffer(str_seq[:len(str_seq) - len(str_seq) % 3].encode(), dtype='S3').astype("U")
7474
assert len(codons) > 0
7575
aas = []

tests/test_translate.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
import sys
66

7+
import pytest
78
from Bio.Seq import Seq
89
from Bio.SeqFeature import SeqFeature, FeatureLocation
910

@@ -23,14 +24,29 @@ def test_safe_translate(self):
2324
(('ATG---',), 'M-'),
2425
(('ATGTAG',), 'M*'),
2526
(('',), ''),
26-
(('ATGT',), 'MX'),
2727
(('ATGA-G',), 'MX')]
2828

2929
# input each pair into the function and check
3030
for pair in params_and_outs:
3131
params, out = pair
3232
assert translate.safe_translate(*params) == out
3333

34+
def test_safe_translate_errors(self):
35+
'''
36+
Test that safe_translate raises ValueError when sequence length is not divisible by 3
37+
'''
38+
invalid_sequences = [
39+
'A',
40+
'AT',
41+
'ATGT',
42+
'ATGTA',
43+
'ATGTAGA',
44+
]
45+
46+
for seq in invalid_sequences:
47+
with pytest.raises(ValueError, match="not divisible by 3"):
48+
translate.safe_translate(seq)
49+
3450
def test_translate_feature(self):
3551
'''
3652
Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids

0 commit comments

Comments
 (0)