Skip to content

Commit 03b5399

Browse files
test(utils.py): handle scenario where text tokens + reasoning tokens … (#10165)
* test(utils.py): handle scenario where text tokens + reasoning tokens set, but reasoning tokens not charged separately Addresses #10141 (comment) * fix(vertex_and_google_ai_studio.py): only set content if non-empty str
1 parent 99db1b7 commit 03b5399

File tree

5 files changed

+90
-49
lines changed

5 files changed

+90
-49
lines changed

litellm/litellm_core_utils/llm_cost_calc/utils.py

+20-12
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,10 @@ def generic_cost_per_token(
265265
)
266266

267267
## CALCULATE OUTPUT COST
268-
text_tokens = usage.completion_tokens
268+
text_tokens = 0
269269
audio_tokens = 0
270270
reasoning_tokens = 0
271+
is_text_tokens_total = False
271272
if usage.completion_tokens_details is not None:
272273
audio_tokens = (
273274
cast(
@@ -281,7 +282,7 @@ def generic_cost_per_token(
281282
Optional[int],
282283
getattr(usage.completion_tokens_details, "text_tokens", None),
283284
)
284-
or usage.completion_tokens # default to completion tokens, if this field is not set
285+
or 0 # default to completion tokens, if this field is not set
285286
)
286287
reasoning_tokens = (
287288
cast(
@@ -290,6 +291,11 @@ def generic_cost_per_token(
290291
)
291292
or 0
292293
)
294+
295+
if text_tokens == 0:
296+
text_tokens = usage.completion_tokens
297+
if text_tokens == usage.completion_tokens:
298+
is_text_tokens_total = True
293299
## TEXT COST
294300
completion_cost = float(text_tokens) * completion_base_cost
295301

@@ -302,19 +308,21 @@ def generic_cost_per_token(
302308
)
303309

304310
## AUDIO COST
305-
if (
306-
_output_cost_per_audio_token is not None
307-
and audio_tokens is not None
308-
and audio_tokens > 0
309-
):
311+
if not is_text_tokens_total and audio_tokens is not None and audio_tokens > 0:
312+
_output_cost_per_audio_token = (
313+
_output_cost_per_audio_token
314+
if _output_cost_per_audio_token is not None
315+
else completion_base_cost
316+
)
310317
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
311318

312319
## REASONING COST
313-
if (
314-
_output_cost_per_reasoning_token is not None
315-
and reasoning_tokens
316-
and reasoning_tokens > 0
317-
):
320+
if not is_text_tokens_total and reasoning_tokens and reasoning_tokens > 0:
321+
_output_cost_per_reasoning_token = (
322+
_output_cost_per_reasoning_token
323+
if _output_cost_per_reasoning_token is not None
324+
else completion_base_cost
325+
)
318326
completion_cost += float(reasoning_tokens) * _output_cost_per_reasoning_token
319327

320328
return prompt_cost, completion_cost

litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -587,14 +587,15 @@ def get_assistant_content_message(
587587
_content_str += "data:{};base64,{}".format(
588588
part["inlineData"]["mimeType"], part["inlineData"]["data"]
589589
)
590-
if part.get("thought") is True:
591-
if reasoning_content_str is None:
592-
reasoning_content_str = ""
593-
reasoning_content_str += _content_str
594-
else:
595-
if content_str is None:
596-
content_str = ""
597-
content_str += _content_str
590+
if len(_content_str) > 0:
591+
if part.get("thought") is True:
592+
if reasoning_content_str is None:
593+
reasoning_content_str = ""
594+
reasoning_content_str += _content_str
595+
else:
596+
if content_str is None:
597+
content_str = ""
598+
content_str += _content_str
598599

599600
return content_str, reasoning_content_str
600601

litellm/model_prices_and_context_window_backup.json

-29
Original file line numberDiff line numberDiff line change
@@ -4979,35 +4979,6 @@
49794979
"supports_tool_choice": true
49804980
},
49814981
"gemini-2.5-pro-exp-03-25": {
4982-
"max_tokens": 65536,
4983-
"max_input_tokens": 1048576,
4984-
"max_output_tokens": 65536,
4985-
"max_images_per_prompt": 3000,
4986-
"max_videos_per_prompt": 10,
4987-
"max_video_length": 1,
4988-
"max_audio_length_hours": 8.4,
4989-
"max_audio_per_prompt": 1,
4990-
"max_pdf_size_mb": 30,
4991-
"input_cost_per_token": 0,
4992-
"input_cost_per_token_above_200k_tokens": 0,
4993-
"output_cost_per_token": 0,
4994-
"output_cost_per_token_above_200k_tokens": 0,
4995-
"litellm_provider": "vertex_ai-language-models",
4996-
"mode": "chat",
4997-
"supports_system_messages": true,
4998-
"supports_function_calling": true,
4999-
"supports_vision": true,
5000-
"supports_audio_input": true,
5001-
"supports_video_input": true,
5002-
"supports_pdf_input": true,
5003-
"supports_response_schema": true,
5004-
"supports_tool_choice": true,
5005-
"supported_endpoints": ["/v1/chat/completions", "/v1/completions"],
5006-
"supported_modalities": ["text", "image", "audio", "video"],
5007-
"supported_output_modalities": ["text"],
5008-
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
5009-
},
5010-
"gemini-2.5-pro-preview-03-25": {
50114982
"max_tokens": 65536,
50124983
"max_input_tokens": 1048576,
50134984
"max_output_tokens": 65536,

tests/litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py

+41
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,47 @@
2626
from litellm.types.utils import Usage
2727

2828

29+
def test_reasoning_tokens_no_price_set():
30+
model = "o1-mini"
31+
custom_llm_provider = "openai"
32+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
33+
litellm.model_cost = litellm.get_model_cost_map(url="")
34+
model_cost_map = litellm.model_cost[model]
35+
usage = Usage(
36+
completion_tokens=1578,
37+
prompt_tokens=17,
38+
total_tokens=1595,
39+
completion_tokens_details=CompletionTokensDetailsWrapper(
40+
accepted_prediction_tokens=None,
41+
audio_tokens=None,
42+
reasoning_tokens=952,
43+
rejected_prediction_tokens=None,
44+
text_tokens=626,
45+
),
46+
prompt_tokens_details=PromptTokensDetailsWrapper(
47+
audio_tokens=None, cached_tokens=None, text_tokens=17, image_tokens=None
48+
),
49+
)
50+
prompt_cost, completion_cost = generic_cost_per_token(
51+
model=model,
52+
usage=usage,
53+
custom_llm_provider="openai",
54+
)
55+
assert round(prompt_cost, 10) == round(
56+
model_cost_map["input_cost_per_token"] * usage.prompt_tokens,
57+
10,
58+
)
59+
print(f"completion_cost: {completion_cost}")
60+
expected_completion_cost = (
61+
model_cost_map["output_cost_per_token"] * usage.completion_tokens
62+
)
63+
print(f"expected_completion_cost: {expected_completion_cost}")
64+
assert round(completion_cost, 10) == round(
65+
expected_completion_cost,
66+
10,
67+
)
68+
69+
2970
def test_reasoning_tokens_gemini():
3071
model = "gemini-2.5-flash-preview-04-17"
3172
custom_llm_provider = "gemini"

tests/litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py

+20
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,23 @@ def test_vertex_ai_thinking_output_part():
239239
content, reasoning_content = v.get_assistant_content_message(parts=parts)
240240
assert content == "Hello world"
241241
assert reasoning_content == "I'm thinking..."
242+
243+
244+
def test_vertex_ai_empty_content():
245+
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
246+
VertexGeminiConfig,
247+
)
248+
from litellm.types.llms.vertex_ai import HttpxPartType
249+
250+
v = VertexGeminiConfig()
251+
parts = [
252+
HttpxPartType(
253+
functionCall={
254+
"name": "get_current_weather",
255+
"arguments": "{}",
256+
},
257+
),
258+
]
259+
content, reasoning_content = v.get_assistant_content_message(parts=parts)
260+
assert content is None
261+
assert reasoning_content is None

0 commit comments

Comments
 (0)