Skip to content

Commit 67d8c39

Browse files
committed
- added tokenizer fallback (required for gpt-4.1 at the moment)
1 parent c01d724 commit 67d8c39

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/hierarchical.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,10 @@ def __init__(
160160
try:
161161
self.tokenizer = Tokenizer.from_pretrained(tokenizer)
162162
except:
163-
raise ValueError("Invalid tokenizer or model name")
163+
logger.warning(
164+
f"Tokenizer {tokenizer} not found, using o200k_base tokenizer instead."
165+
)
166+
self.tokenizer = tiktoken.get_encoding('o200k_base')
164167
else:
165168
self.tokenizer = tokenizer
166169
self.chunker = chunker

0 commit comments

Comments
 (0)