Skip to content

Commit f8bf5af

Browse files
committed
Apply isort and black reformatting
Signed-off-by: akoumpa <[email protected]>
1 parent d990378 commit f8bf5af

File tree

3 files changed

+19
-28
lines changed

3 files changed

+19
-28
lines changed

nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def text_to_ids(self, text):
313313
return ids
314314

315315
def apply_chat_template(self, *args, **kwargs):
316-
""" Appies chat template and tokenizes results """
316+
"""Appies chat template and tokenizes results"""
317317
return self.tokenizer.apply_chat_template(*args, **kwargs)
318318

319319
def ids_to_text(self, ids, remove_special_tokens=True):

nemo/collections/common/tokenizers/tokenizer_spec.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -26,40 +26,40 @@ class TokenizerSpec(ABC):
2626

2727
@abstractmethod
2828
def text_to_tokens(self, text):
29-
""" Converts text into a list of tokens. """
29+
"""Converts text into a list of tokens."""
3030
pass
3131

3232
@abstractmethod
3333
def tokens_to_text(self, tokens):
34-
""" Converts a list of tokens back into text. """
34+
"""Converts a list of tokens back into text."""
3535
pass
3636

3737
@abstractmethod
3838
def tokens_to_ids(self, tokens):
39-
""" Converts a list of tokens to their corresponding IDs. """
39+
"""Converts a list of tokens to their corresponding IDs."""
4040
pass
4141

4242
@abstractmethod
4343
def ids_to_tokens(self, ids):
44-
""" Converts a list of token IDs back to tokens. """
44+
"""Converts a list of token IDs back to tokens."""
4545
pass
4646

4747
@abstractmethod
4848
def text_to_ids(self, text):
49-
""" Converts text directly to token IDs. """
49+
"""Converts text directly to token IDs."""
5050
pass
5151

5252
@abstractmethod
5353
def ids_to_text(self, ids):
54-
""" Converts token IDs back to text. """
54+
"""Converts token IDs back to text."""
5555
pass
5656

5757
def add_special_tokens(self, special_tokens: List[str]):
58-
""" Adds special tokens (eos, pad, cls...) to vocab. """
58+
"""Adds special tokens (eos, pad, cls...) to vocab."""
5959
raise NotImplementedError("To be implemented")
6060

6161
def apply_chat_template(self, *args, **kwargs):
62-
""" Appies chat template and tokenizes results """
62+
"""Appies chat template and tokenizes results"""
6363
raise NotImplementedError("To be implemented")
6464

6565
@property
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,26 @@
11
import pytest
2-
from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer
32
from transfomers import AutoTokenizer
43

4+
from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer
5+
6+
57
def test_chat_template():
68
path = "/home/TestData/akoumparouli/tokenizer_with_chat_template/"
79
tokenizers = [get_tokenizer(path), AutoTokenizer.from_pretrained(path)]
810
prompt = "Give me a short introduction to pytest."
9-
messages = [
10-
{"role": "system", "content": "You are a helpful CI assistant."},
11-
{"role": "user", "content": prompt}
11+
messages = [{"role": "system", "content": "You are a helpful CI assistant."}, {"role": "user", "content": prompt}]
12+
texts = [
13+
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for tokenizer in tokenizers
1214
]
13-
texts = [tokenizer.apply_chat_template(
14-
messages,
15-
tokenize=False,
16-
add_generation_prompt=True
17-
) for tokenizer in tokenizers]
1815
assert texts[0] == texts[1]
1916

17+
2018
def test_throws_chat_template():
2119
path = "/home/TestData/akoumparouli/tokenizer_without_chat_template/"
2220
tokenizer = get_tokenizer(path)
2321
prompt = "Give me a short introduction to pytest."
24-
messages = [
25-
{"role": "system", "content": "You are a helpful CI assistant."},
26-
{"role": "user", "content": prompt}
27-
]
22+
messages = [{"role": "system", "content": "You are a helpful CI assistant."}, {"role": "user", "content": prompt}]
2823
try:
29-
tokenizer.apply_chat_template(
30-
messages,
31-
tokenize=False,
32-
add_generation_prompt=True
33-
)
24+
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
3425
except ValueError as e:
35-
assert 'Cannot use chat template functions because tokenizer.chat_template is not set' in str(e)
26+
assert 'Cannot use chat template functions because tokenizer.chat_template is not set' in str(e)

0 commit comments

Comments
 (0)