Skip to content

Commit d783190

Browse files
Update fireworks ai pricing (#10425)
* build(model_prices_and_context_window.json): add fireworks ai new 0-4b pricing tier * build(model_prices_and_context_window.json): add more fireworks ai models * test: update testing * test: testing updates * test: update test * test: update test
1 parent 839878f commit d783190

8 files changed

+150
-20
lines changed

litellm/constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
DEFAULT_REPLICATE_GPU_PRICE_PER_SECOND = 0.001400 # price per second for a100 80GB
9595
FIREWORKS_AI_56_B_MOE = 56
9696
FIREWORKS_AI_176_B_MOE = 176
97+
FIREWORKS_AI_4_B = 4
9798
FIREWORKS_AI_16_B = 16
9899
FIREWORKS_AI_80_B = 80
99100

litellm/llms/fireworks_ai/cost_calculator.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from typing import Tuple
66

77
from litellm.constants import (
8+
FIREWORKS_AI_4_B,
89
FIREWORKS_AI_16_B,
910
FIREWORKS_AI_56_B_MOE,
10-
FIREWORKS_AI_80_B,
1111
FIREWORKS_AI_176_B_MOE,
1212
)
1313
from litellm.types.utils import Usage
@@ -43,10 +43,12 @@ def get_base_model_for_pricing(model_name: str) -> str:
4343
params_billion = float(params_match)
4444

4545
# Determine the category based on the number of parameters
46-
if params_billion <= FIREWORKS_AI_16_B:
47-
return "fireworks-ai-up-to-16b"
48-
elif params_billion <= FIREWORKS_AI_80_B:
49-
return "fireworks-ai-16b-80b"
46+
if params_billion <= FIREWORKS_AI_4_B:
47+
return "fireworks-ai-up-to-4b"
48+
elif params_billion <= FIREWORKS_AI_16_B:
49+
return "fireworks-ai-4.1b-to-16b"
50+
elif params_billion > FIREWORKS_AI_16_B:
51+
return "fireworks-ai-above-16b"
5052

5153
# If no matches, return the original model_name
5254
return "fireworks-ai-default"

litellm/model_prices_and_context_window_backup.json

+68-4
Original file line numberDiff line numberDiff line change
@@ -11084,7 +11084,7 @@
1108411084
"fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": {
1108511085
"max_tokens": 65536,
1108611086
"max_input_tokens": 65536,
11087-
"max_output_tokens": 8192,
11087+
"max_output_tokens": 65536,
1108811088
"input_cost_per_token": 0.0000012,
1108911089
"output_cost_per_token": 0.0000012,
1109011090
"litellm_provider": "fireworks_ai",
@@ -11106,7 +11106,66 @@
1110611106
"source": "https://fireworks.ai/pricing",
1110711107
"supports_tool_choice": true
1110811108
},
11109-
11109+
"fireworks_ai/accounts/fireworks/models/deepseek-r1": {
11110+
"max_tokens": 20480,
11111+
"max_input_tokens": 128000,
11112+
"max_output_tokens": 20480,
11113+
"input_cost_per_token": 3e-6,
11114+
"output_cost_per_token": 8e-6,
11115+
"litellm_provider": "fireworks_ai",
11116+
"mode": "chat",
11117+
"supports_response_schema": true,
11118+
"source": "https://fireworks.ai/pricing",
11119+
"supports_tool_choice": true
11120+
},
11121+
"fireworks_ai/accounts/fireworks/models/deepseek-r1-basic": {
11122+
"max_tokens": 20480,
11123+
"max_input_tokens": 128000,
11124+
"max_output_tokens": 20480,
11125+
"input_cost_per_token": 0.55e-6,
11126+
"output_cost_per_token": 2.19e-6,
11127+
"litellm_provider": "fireworks_ai",
11128+
"mode": "chat",
11129+
"supports_response_schema": true,
11130+
"source": "https://fireworks.ai/pricing",
11131+
"supports_tool_choice": true
11132+
},
11133+
"fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": {
11134+
"max_tokens": 16384,
11135+
"max_input_tokens": 128000,
11136+
"max_output_tokens": 16384,
11137+
"input_cost_per_token": 3e-6,
11138+
"output_cost_per_token": 3e-6,
11139+
"litellm_provider": "fireworks_ai",
11140+
"mode": "chat",
11141+
"supports_response_schema": true,
11142+
"source": "https://fireworks.ai/pricing",
11143+
"supports_tool_choice": true
11144+
},
11145+
"fireworks_ai/accounts/fireworks/models/llama4-maverick-instruct-basic": {
11146+
"max_tokens": 131072,
11147+
"max_input_tokens": 131072,
11148+
"max_output_tokens": 131072,
11149+
"input_cost_per_token": 0.22e-6,
11150+
"output_cost_per_token": 0.88e-6,
11151+
"litellm_provider": "fireworks_ai",
11152+
"mode": "chat",
11153+
"supports_response_schema": true,
11154+
"source": "https://fireworks.ai/pricing",
11155+
"supports_tool_choice": true
11156+
},
11157+
"fireworks_ai/accounts/fireworks/models/llama4-scout-instruct-basic": {
11158+
"max_tokens": 131072,
11159+
"max_input_tokens": 131072,
11160+
"max_output_tokens": 131072,
11161+
"input_cost_per_token": 0.15e-6,
11162+
"output_cost_per_token": 0.60e-6,
11163+
"litellm_provider": "fireworks_ai",
11164+
"mode": "chat",
11165+
"supports_response_schema": true,
11166+
"source": "https://fireworks.ai/pricing",
11167+
"supports_tool_choice": true
11168+
},
1111011169
"fireworks_ai/nomic-ai/nomic-embed-text-v1.5": {
1111111170
"max_tokens": 8192,
1111211171
"max_input_tokens": 8192,
@@ -11152,12 +11211,17 @@
1115211211
"mode": "embedding",
1115311212
"source": "https://fireworks.ai/pricing"
1115411213
},
11155-
"fireworks-ai-up-to-16b": {
11214+
"fireworks-ai-up-to-4b": {
11215+
"input_cost_per_token": 0.0000002,
11216+
"output_cost_per_token": 0.0000002,
11217+
"litellm_provider": "fireworks_ai"
11218+
},
11219+
"fireworks-ai-4.1b-to-16b": {
1115611220
"input_cost_per_token": 0.0000002,
1115711221
"output_cost_per_token": 0.0000002,
1115811222
"litellm_provider": "fireworks_ai"
1115911223
},
11160-
"fireworks-ai-16.1b-to-80b": {
11224+
"fireworks-ai-above-16b": {
1116111225
"input_cost_per_token": 0.0000009,
1116211226
"output_cost_per_token": 0.0000009,
1116311227
"litellm_provider": "fireworks_ai"

model_prices_and_context_window.json

+68-4
Original file line numberDiff line numberDiff line change
@@ -11084,7 +11084,7 @@
1108411084
"fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": {
1108511085
"max_tokens": 65536,
1108611086
"max_input_tokens": 65536,
11087-
"max_output_tokens": 8192,
11087+
"max_output_tokens": 65536,
1108811088
"input_cost_per_token": 0.0000012,
1108911089
"output_cost_per_token": 0.0000012,
1109011090
"litellm_provider": "fireworks_ai",
@@ -11106,7 +11106,66 @@
1110611106
"source": "https://fireworks.ai/pricing",
1110711107
"supports_tool_choice": true
1110811108
},
11109-
11109+
"fireworks_ai/accounts/fireworks/models/deepseek-r1": {
11110+
"max_tokens": 20480,
11111+
"max_input_tokens": 128000,
11112+
"max_output_tokens": 20480,
11113+
"input_cost_per_token": 3e-6,
11114+
"output_cost_per_token": 8e-6,
11115+
"litellm_provider": "fireworks_ai",
11116+
"mode": "chat",
11117+
"supports_response_schema": true,
11118+
"source": "https://fireworks.ai/pricing",
11119+
"supports_tool_choice": true
11120+
},
11121+
"fireworks_ai/accounts/fireworks/models/deepseek-r1-basic": {
11122+
"max_tokens": 20480,
11123+
"max_input_tokens": 128000,
11124+
"max_output_tokens": 20480,
11125+
"input_cost_per_token": 0.55e-6,
11126+
"output_cost_per_token": 2.19e-6,
11127+
"litellm_provider": "fireworks_ai",
11128+
"mode": "chat",
11129+
"supports_response_schema": true,
11130+
"source": "https://fireworks.ai/pricing",
11131+
"supports_tool_choice": true
11132+
},
11133+
"fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": {
11134+
"max_tokens": 16384,
11135+
"max_input_tokens": 128000,
11136+
"max_output_tokens": 16384,
11137+
"input_cost_per_token": 3e-6,
11138+
"output_cost_per_token": 3e-6,
11139+
"litellm_provider": "fireworks_ai",
11140+
"mode": "chat",
11141+
"supports_response_schema": true,
11142+
"source": "https://fireworks.ai/pricing",
11143+
"supports_tool_choice": true
11144+
},
11145+
"fireworks_ai/accounts/fireworks/models/llama4-maverick-instruct-basic": {
11146+
"max_tokens": 131072,
11147+
"max_input_tokens": 131072,
11148+
"max_output_tokens": 131072,
11149+
"input_cost_per_token": 0.22e-6,
11150+
"output_cost_per_token": 0.88e-6,
11151+
"litellm_provider": "fireworks_ai",
11152+
"mode": "chat",
11153+
"supports_response_schema": true,
11154+
"source": "https://fireworks.ai/pricing",
11155+
"supports_tool_choice": true
11156+
},
11157+
"fireworks_ai/accounts/fireworks/models/llama4-scout-instruct-basic": {
11158+
"max_tokens": 131072,
11159+
"max_input_tokens": 131072,
11160+
"max_output_tokens": 131072,
11161+
"input_cost_per_token": 0.15e-6,
11162+
"output_cost_per_token": 0.60e-6,
11163+
"litellm_provider": "fireworks_ai",
11164+
"mode": "chat",
11165+
"supports_response_schema": true,
11166+
"source": "https://fireworks.ai/pricing",
11167+
"supports_tool_choice": true
11168+
},
1111011169
"fireworks_ai/nomic-ai/nomic-embed-text-v1.5": {
1111111170
"max_tokens": 8192,
1111211171
"max_input_tokens": 8192,
@@ -11152,12 +11211,17 @@
1115211211
"mode": "embedding",
1115311212
"source": "https://fireworks.ai/pricing"
1115411213
},
11155-
"fireworks-ai-up-to-16b": {
11214+
"fireworks-ai-up-to-4b": {
11215+
"input_cost_per_token": 0.0000002,
11216+
"output_cost_per_token": 0.0000002,
11217+
"litellm_provider": "fireworks_ai"
11218+
},
11219+
"fireworks-ai-4.1b-to-16b": {
1115611220
"input_cost_per_token": 0.0000002,
1115711221
"output_cost_per_token": 0.0000002,
1115811222
"litellm_provider": "fireworks_ai"
1115911223
},
11160-
"fireworks-ai-16.1b-to-80b": {
11224+
"fireworks-ai-above-16b": {
1116111225
"input_cost_per_token": 0.0000009,
1116211226
"output_cost_per_token": 0.0000009,
1116311227
"litellm_provider": "fireworks_ai"

tests/local_testing/test_completion.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2661,11 +2661,10 @@ def test_re_use_openaiClient():
26612661

26622662
def test_completion_azure():
26632663
try:
2664-
print("azure chatgpt-v-3 test\n\n")
26652664
litellm.set_verbose = False
26662665
## Test azure call
26672666
response = completion(
2668-
model="azure/chatgpt-v-3",
2667+
model="azure/gpt-4o-new-test",
26692668
messages=messages,
26702669
api_key="os.environ/AZURE_API_KEY",
26712670
)

tests/local_testing/test_completion_cost.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ def test_completion_cost_databricks_embedding(model):
12831283
@pytest.mark.parametrize(
12841284
"model, base_model",
12851285
[
1286-
("fireworks_ai/llama-v3p1-405b-instruct", "fireworks-ai-default"),
1286+
("fireworks_ai/llama-v3p1-405b-instruct", "fireworks-ai-above-16b"),
12871287
("fireworks_ai/llama4-maverick-instruct-basic", "fireworks-ai-default"),
12881288
],
12891289
)

tests/local_testing/test_custom_callback_input.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,12 @@ def test_chat_azure_stream():
450450
customHandler = CompletionCustomHandler()
451451
litellm.callbacks = [customHandler]
452452
response = litellm.completion(
453-
model="azure/chatgpt-v-3",
453+
model="azure/gpt-4o-new-test",
454454
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
455455
)
456456
# test streaming
457457
response = litellm.completion(
458-
model="azure/chatgpt-v-3",
458+
model="azure/gpt-4o-new-test",
459459
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
460460
stream=True,
461461
)
@@ -464,7 +464,7 @@ def test_chat_azure_stream():
464464
# test failure callback
465465
try:
466466
response = litellm.completion(
467-
model="azure/chatgpt-v-3",
467+
model="azure/gpt-4o-new-test",
468468
messages=[{"role": "user", "content": "Hi 👋 - i'm sync azure"}],
469469
api_key="my-bad-key",
470470
stream=True,

tests/local_testing/test_timeout.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def test_hanging_request_azure():
104104
{
105105
"model_name": "azure-gpt",
106106
"litellm_params": {
107-
"model": "azure/chatgpt-v-3",
107+
"model": "azure/gpt-4o-new-test",
108108
"api_base": os.environ["AZURE_API_BASE"],
109109
"api_key": os.environ["AZURE_API_KEY"],
110110
},

0 commit comments

Comments
 (0)