File tree 1 file changed +7
-0
lines changed
nemo/collections/common/tokenizers
1 file changed +7
-0
lines changed Original file line number Diff line number Diff line change @@ -26,29 +26,36 @@ class TokenizerSpec(ABC):
26
26
27
27
@abstractmethod
28
28
def text_to_tokens (self , text ):
29
+ """ Converts text into a list of tokens. """
29
30
pass
30
31
31
32
@abstractmethod
32
33
def tokens_to_text (self , tokens ):
34
+ """ Converts a list of tokens back into text. """
33
35
pass
34
36
35
37
@abstractmethod
36
38
def tokens_to_ids (self , tokens ):
39
+ """ Converts a list of tokens to their corresponding IDs. """
37
40
pass
38
41
39
42
@abstractmethod
40
43
def ids_to_tokens (self , ids ):
44
+ """ Converts a list of token IDs back to tokens. """
41
45
pass
42
46
43
47
@abstractmethod
44
48
def text_to_ids (self , text ):
49
+ """ Converts text directly to token IDs. """
45
50
pass
46
51
47
52
@abstractmethod
48
53
def ids_to_text (self , ids ):
54
+ """ Converts token IDs back to text. """
49
55
pass
50
56
51
57
def add_special_tokens (self , special_tokens : List [str ]):
58
+ """ Adds special tokens (eos, pad, cls...) to vocab. """
52
59
raise NotImplementedError ("To be implemented" )
53
60
54
61
def apply_chat_template (self , * args , ** kwargs ):
You can’t perform that action at this time.
0 commit comments