Skip to content

Commit 746df7b

Browse files
winstxnhdwBernardZach
authored andcommitted
refactor: remove redundant if-condition and improve type correctness for convert_tokens_to_ids (huggingface#34030)
* chore: remove redundant if-condition * fix: import `Iterable`
1 parent 3dd5225 commit 746df7b

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

src/transformers/tokenization_utils_fast.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import json
2222
import os
2323
from collections import defaultdict
24-
from typing import Any, Dict, List, Optional, Tuple, Union
24+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
2525

2626
import tokenizers.pre_tokenizers as pre_tokenizers_fast
2727
from tokenizers import Encoding as EncodingFast
@@ -326,20 +326,17 @@ def _convert_encoding(
326326

327327
return encoding_dict, encodings
328328

329-
def convert_tokens_to_ids(self, tokens: Union[str, List[str]]) -> Union[int, List[int]]:
329+
def convert_tokens_to_ids(self, tokens: Union[str, Iterable[str]]) -> Union[int, List[int]]:
330330
"""
331-
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
331+
Converts a token string (or a sequence of tokens) in a single integer id (or a Iterable of ids), using the
332332
vocabulary.
333333
334334
Args:
335-
tokens (`str` or `List[str]`): One or several token(s) to convert to token id(s).
335+
tokens (`str` or `Iterable[str]`): One or several token(s) to convert to token id(s).
336336
337337
Returns:
338338
`int` or `List[int]`: The token id or list of token ids.
339339
"""
340-
if tokens is None:
341-
return None
342-
343340
if isinstance(tokens, str):
344341
return self._convert_token_to_id_with_added_voc(tokens)
345342

0 commit comments

Comments
 (0)