Skip to content

Commit c86b333

Browse files
authored
Merge pull request BerriAI#5601 from BerriAI/litellm_tag_routing_fixes
[Feat] Tag Routing - Allow setting default deployments
2 parents 00f1d7b + a6d3bd0 commit c86b333

File tree

4 files changed

+166
-16
lines changed

4 files changed

+166
-16
lines changed

docs/my-website/docs/proxy/tag_routing.md

+53
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ model_list:
2525
model: openai/gpt-4o
2626
api_key: os.environ/OPENAI_API_KEY
2727
tags: ["paid"] # 👈 Key Change
28+
- model_name: gpt-4
29+
litellm_params:
30+
model: openai/gpt-4o
31+
api_key: os.environ/OPENAI_API_KEY
32+
api_base: https://exampleopenaiendpoint-production.up.railway.app/
33+
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
34+
2835

2936
router_settings:
3037
enable_tag_filtering: True # 👈 Key Change
@@ -136,6 +143,46 @@ Response
136143
}
137144
```
138145

146+
## Setting Default Tags
147+
148+
Use this if you want all untagged requests to be routed to specific deployments
149+
150+
1. Set default tag on your yaml
151+
```yaml
152+
model_list:
153+
- model_name: fake-openai-endpoint
154+
litellm_params:
155+
model: openai/fake
156+
api_key: fake-key
157+
api_base: https://exampleopenaiendpoint-production.up.railway.app/
158+
tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
159+
model_info:
160+
id: "default-model" # used for identifying model in response headers
161+
```
162+
163+
2. Start proxy
164+
```shell
165+
$ litellm --config /path/to/config.yaml
166+
```
167+
168+
3. Make request with no tags
169+
```shell
170+
curl -i http://localhost:4000/v1/chat/completions \
171+
-H "Content-Type: application/json" \
172+
-H "Authorization: Bearer sk-1234" \
173+
-d '{
174+
"model": "fake-openai-endpoint",
175+
"messages": [
176+
{"role": "user", "content": "Hello, Claude gm!"}
177+
]
178+
}'
179+
```
180+
181+
Expect to see the following response header when this works
182+
```shell
183+
x-litellm-model-id: default-model
184+
```
185+
139186
## ✨ Team based tag routing (Enterprise)
140187

141188
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
@@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
170217
tags: ["teamB"] # 👈 Key Change
171218
model_info:
172219
id: "team-b-model" # used for identifying model in response headers
220+
- model_name: fake-openai-endpoint
221+
litellm_params:
222+
model: openai/fake
223+
api_key: fake-key
224+
api_base: https://exampleopenaiendpoint-production.up.railway.app/
225+
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
173226
174227
router_settings:
175228
enable_tag_filtering: True # 👈 Key Change

litellm/proxy/proxy_config.yaml

+17-12
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,30 @@
11
model_list:
22
- model_name: openai/*
33
litellm_params:
4-
model: gpt-3.5-turbo
4+
model: openai/*
55
api_key: os.environ/OPENAI_API_KEY
6+
model_info:
7+
id: "good-openai"
8+
- model_name: openai/*
9+
litellm_params:
10+
model: openai/*
11+
api_key: os.environ/non-exsitent-env-var
12+
tags: ["bad-model"]
13+
model_info:
14+
id: "test-openai"
615

7-
litellm_settings:
8-
success_callback: ["prometheus"]
9-
failure_callback: ["prometheus"]
1016

11-
guardrails:
12-
- guardrail_name: "presidio-pre-guard"
13-
litellm_params:
14-
guardrail: presidio # supported values: "aporia", "lakera", "presidio"
15-
mode: "pre_call" # pre_call, during_call, post_call
16-
output_parse_pii: True
1717

18-
litellm_settings:
19-
callbacks: ["prometheus"]
18+
router_settings:
19+
enable_tag_filtering: True # 👈 Key Chang
20+
2021

2122
general_settings:
2223
master_key: sk-1234
2324
alerting: ["slack"]
2425
spend_report_frequency: "1d"
2526

27+
28+
litellm_settings:
29+
success_callback: ["prometheus"]
30+
failure_callback: ["prometheus"]

litellm/router_strategy/tag_based_routing.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
"""
2-
Use this to route requests between free and paid tiers
2+
Use this to route requests between Teams
3+
4+
- If tags in request is a subset of tags in deployment, return deployment
5+
- if deployments are set with default tags, return all default deployment
6+
- If no default_deployments are set, return all deployments
37
"""
48

59
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -25,14 +29,14 @@ async def get_deployments_for_tag(
2529

2630
if request_kwargs is None:
2731
verbose_logger.debug(
28-
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
32+
"get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
2933
healthy_deployments,
3034
)
3135
return healthy_deployments
3236

3337
if healthy_deployments is None:
3438
verbose_logger.debug(
35-
"get_deployments_for_tier: healthy_deployments is None returning healthy_deployments"
39+
"get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
3640
)
3741
return healthy_deployments
3842

@@ -43,7 +47,9 @@ async def get_deployments_for_tag(
4347

4448
new_healthy_deployments = []
4549
if request_tags:
46-
verbose_logger.debug("parameter routing: router_keys: %s", request_tags)
50+
verbose_logger.debug(
51+
"get_deployments_for_tag routing: router_keys: %s", request_tags
52+
)
4753
# example this can be router_keys=["free", "custom"]
4854
# get all deployments that have a superset of these router keys
4955
for deployment in healthy_deployments:
@@ -66,9 +72,26 @@ async def get_deployments_for_tag(
6672
request_tags,
6773
)
6874
new_healthy_deployments.append(deployment)
75+
elif "default" in deployment_tags:
76+
verbose_logger.debug(
77+
"adding default deployment with tags: %s, request tags: %s",
78+
deployment_tags,
79+
request_tags,
80+
)
81+
new_healthy_deployments.append(deployment)
6982

7083
return new_healthy_deployments
7184

85+
# for Untagged requests use default deployments if set
86+
_default_deployments_with_tags = []
87+
for deployment in healthy_deployments:
88+
if "default" in deployment.get("litellm_params", {}).get("tags", []):
89+
_default_deployments_with_tags.append(deployment)
90+
91+
if len(_default_deployments_with_tags) > 0:
92+
return _default_deployments_with_tags
93+
94+
# if no default deployment is found, return healthy_deployments
7295
verbose_logger.debug(
7396
"no tier found in metadata, returning healthy_deployments: %s",
7497
healthy_deployments,

litellm/tests/test_router_tag_routing.py

+69
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
9191
print("response_extra_info: ", response_extra_info)
9292

9393
assert response_extra_info["model_id"] == "very-expensive-model"
94+
95+
96+
@pytest.mark.asyncio()
97+
async def test_default_tagged_deployments():
98+
"""
99+
- only use default deployment for untagged requests
100+
- if a request has tag "default", use default deployment
101+
"""
102+
103+
router = litellm.Router(
104+
model_list=[
105+
{
106+
"model_name": "gpt-4",
107+
"litellm_params": {
108+
"model": "gpt-4o",
109+
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
110+
"tags": ["default"],
111+
},
112+
"model_info": {"id": "default-model"},
113+
},
114+
{
115+
"model_name": "gpt-4",
116+
"litellm_params": {
117+
"model": "gpt-4o",
118+
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
119+
},
120+
"model_info": {"id": "default-model-2"},
121+
},
122+
{
123+
"model_name": "gpt-4",
124+
"litellm_params": {
125+
"model": "gpt-4o-mini",
126+
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
127+
"tags": ["teamA"],
128+
},
129+
"model_info": {"id": "very-expensive-model"},
130+
},
131+
],
132+
enable_tag_filtering=True,
133+
)
134+
135+
for _ in range(5):
136+
# Untagged request, this should pick model with id == "default-model"
137+
response = await router.acompletion(
138+
model="gpt-4",
139+
messages=[{"role": "user", "content": "Tell me a joke."}],
140+
)
141+
142+
print("Response: ", response)
143+
144+
response_extra_info = response._hidden_params
145+
print("response_extra_info: ", response_extra_info)
146+
147+
assert response_extra_info["model_id"] == "default-model"
148+
149+
for _ in range(5):
150+
# requests tagged with "default", this should pick model with id == "default-model"
151+
response = await router.acompletion(
152+
model="gpt-4",
153+
messages=[{"role": "user", "content": "Tell me a joke."}],
154+
metadata={"tags": ["default"]},
155+
)
156+
157+
print("Response: ", response)
158+
159+
response_extra_info = response._hidden_params
160+
print("response_extra_info: ", response_extra_info)
161+
162+
assert response_extra_info["model_id"] == "default-model"

0 commit comments

Comments
 (0)