@@ -217,6 +217,7 @@ def printTokens(self, tokens):
217
217
# Tokenizer #4 (fast) https://github.com/LoganDark
218
218
########################################################################################################
219
219
220
+ from typing import Generator
220
221
from ast import literal_eval
221
222
222
223
class FastTokenizer :
@@ -255,7 +256,7 @@ def next_token(self, src: bytes) -> int:
255
256
break
256
257
return last_token
257
258
258
- def encode_bytes (self , src : bytes ) -> list [int ]:
259
+ def encode_bytes (self , src : bytes ) -> Generator [int , None , None ]:
259
260
start , stop = 0 , len (src )
260
261
while start < stop :
261
262
last_token , last = None , self .root
@@ -274,7 +275,7 @@ def encode_bytes(self, src: bytes) -> list[int]:
274
275
def decode_bytes (self , tokens : list [int ]) -> bytes :
275
276
return b'' .join (map (self .tok2val .__getitem__ , tokens ))
276
277
277
- def encode (self , src : str ) -> list [int ]:
278
+ def encode (self , src : str ) -> Generator [int , None , None ]:
278
279
return self .encode_bytes (src .encode ('utf-8' ))
279
280
280
281
def decode (self , tokens : list [int ]) -> str :
0 commit comments