Skip to content

Commit d5e9ee1

Browse files
committed
🚧 Error when translating sequence lengths indivisible by 3
… instead of silently padding with N to translate to 'X'. A sequence with 1 or 2 extra bases indicates a problem with the data. It doesn't make sense to end the translation with 'X' which implies 3 bases. 🚧: see FIXME in code
1 parent 7c34a61 commit d5e9ee1

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

‎augur/translate.py‎

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,16 @@ def safe_translate(sequence):
5151
>>> safe_translate("")
5252
''
5353
>>> safe_translate("ATGT")
54-
'MX'
54+
Traceback (most recent call last):
55+
...
56+
ValueError: Sequence not divisible by 3.
5557
"""
5658
from Bio.Data.CodonTable import TranslationError
5759
from Bio.Seq import CodonTable
5860

59-
#sequences not mod 3 give messy BiopythonWarning, so avoid by padding.
6061
if len(sequence)%3:
61-
sequence_padded = sequence + "N"*(3-len(sequence)%3)
62+
raise ValueError("Sequence not divisible by 3.")
63+
# FIXME: handle this elsewhere since a proper error message needs more context - which sequence from which file?
6264
else:
6365
sequence_padded = sequence
6466
try:

‎tests/test_translate.py‎

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
import sys
66

7+
import pytest
78
from Bio.Seq import Seq
89
from Bio.SeqFeature import SeqFeature, FeatureLocation
910

@@ -23,14 +24,29 @@ def test_safe_translate(self):
2324
(('ATG---',), 'M-'),
2425
(('ATGTAG',), 'M*'),
2526
(('',), ''),
26-
(('ATGT',), 'MX'),
2727
(('ATGA-G',), 'MX')]
2828

2929
# input each pair into the function and check
3030
for pair in params_and_outs:
3131
params, out = pair
3232
assert translate.safe_translate(*params) == out
3333

34+
def test_safe_translate_errors(self):
35+
'''
36+
Test that safe_translate raises ValueError when sequence is not divisible by 3
37+
'''
38+
invalid_sequences = [
39+
'A',
40+
'AT',
41+
'ATGT',
42+
'ATGTA',
43+
'ATGTAGA',
44+
]
45+
46+
for seq in invalid_sequences:
47+
with pytest.raises(ValueError, match="Sequence not divisible by 3"):
48+
translate.safe_translate(seq)
49+
3450
def test_translate_feature(self):
3551
'''
3652
Test translate_feature from a dictionary of given nucleotides to dictionary of translated amino acids

0 commit comments

Comments
 (0)