-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokens_counter.py
67 lines (58 loc) · 2.39 KB
/
tokens_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import anthropic
import json
client = anthropic.Anthropic()
prompt_file = "prompt.txt"
try:
with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read()
if not prompt.strip():
print(f"Error: prompt file '{prompt_file}' is empty.")
sys.exit(1)
except FileNotFoundError:
print(f"Error: prompt file '{prompt_file}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error reading prompt file '{prompt_file}': {e}")
sys.exit(1)
response = client.beta.messages.count_tokens(
model="claude-3-7-sonnet-20250219",
thinking={
"type": "enabled",
"budget_tokens": 128000
},
messages=[{"role": "user", "content": prompt}],
betas=["output-128k-2025-02-19"]
)
# print(response)
# print(response.model_dump_json())
print(f"input_tokens: {response.input_tokens}")
context_window = 200000 # total context window size
output_capacity = 128000 # via the beta feature
thinking_tokens = context_window - response.input_tokens
print(f"thinking_tokens: {thinking_tokens}")
if thinking_tokens < 32000:
print(f"Error: prompt is too large to have a proper thinking budget!")
sys.exit(1)
max_tokens = thinking_tokens + response.input_tokens
print(f"max_tokens: {max_tokens}")
# Calculate available tokens after prompt
prompt_tokens = prompt_token_count
available_tokens = args.context_window - prompt_tokens
# For API call, max_tokens must respect the API limit
max_tokens = min(available_tokens, args.betas_max_tokens)
# Thinking budget must be LESS than max_tokens to leave room for visible output
thinking_budget = max_tokens - args.desired_output_tokens
if thinking_budget > 32000:
print(f"Warning: thinking budget is larger than 32K, reset to 32K. Use batch for larger thinking budgets.")
thinking_budget = 32000
print(f"Running character analysis...")
print(f"\nToken stats:")
print(f"Max AI model context window: [{args.context_window}] tokens")
print(f"Input prompt tokens: [{prompt_tokens}]")
print(f"Available tokens: [{available_tokens}] = {args.context_window} - {prompt_tokens}")
print(f"Desired output tokens: [{args.desired_output_tokens}]")
print(f"AI model thinking budget: [{thinking_budget}] tokens")
print(f"Max output tokens (max_tokens): [{max_tokens}] tokens")
if thinking_budget < args.thinking_budget_tokens:
print(f"Error: prompt is too large to have a {args.thinking_budget_tokens} thinking budget!")
sys.exit(1)