Support x-litellm-api-key header param + allow key at max budget to call non-llm api endpoints (#10392)

krrishdholakia · web-flow · commit 839878f4f550 · 2025-04-29T18:57:57.000-07:00
* fix(user_api_key_auth.py): fix passing `x-litellm-api-key` to user api key auth

Support using this when given, or bearer token when given

 Fixes issue with auth on vertex passthrough

* test(test_user_api_key_auth.py): use new fastapi.security check

* fix(user_api_key_auth.py): allow key at budget, to still call non-llm api endpoints

Fixes issue where key at budget, couldn't call `/key/info`
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
@@ -326,6 +326,7 @@ class LiteLLMRoutes(enum.Enum):
         "/v1/messages",
     ]
 
+    llm_api_routes = openai_routes + anthropic_routes + mapped_pass_through_routes
     info_routes = [
         "/key/info",
         "/key/health",
@@ -654,9 +655,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
     allowed_cache_controls: Optional[list] = []
     config: Optional[dict] = {}
     permissions: Optional[dict] = {}
-    model_max_budget: Optional[dict] = (
-        {}
-    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
+    model_max_budget: Optional[
+        dict
+    ] = {}  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
 
     model_config = ConfigDict(protected_namespaces=())
     model_rpm_limit: Optional[dict] = None
@@ -918,12 +919,12 @@ class NewCustomerRequest(BudgetNewRequest):
     alias: Optional[str] = None  # human-friendly alias
     blocked: bool = False  # allow/disallow requests for this end-user
     budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[AllowedModelRegion] = (
-        None  # require all user requests to use models in this specific region
-    )
-    default_model: Optional[str] = (
-        None  # if no equivalent model in allowed region - default all requests to this model
-    )
+    allowed_model_region: Optional[
+        AllowedModelRegion
+    ] = None  # require all user requests to use models in this specific region
+    default_model: Optional[
+        str
+    ] = None  # if no equivalent model in allowed region - default all requests to this model
 
     @model_validator(mode="before")
     @classmethod
@@ -945,12 +946,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
     blocked: bool = False  # allow/disallow requests for this end-user
     max_budget: Optional[float] = None
     budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[AllowedModelRegion] = (
-        None  # require all user requests to use models in this specific region
-    )
-    default_model: Optional[str] = (
-        None  # if no equivalent model in allowed region - default all requests to this model
-    )
+    allowed_model_region: Optional[
+        AllowedModelRegion
+    ] = None  # require all user requests to use models in this specific region
+    default_model: Optional[
+        str
+    ] = None  # if no equivalent model in allowed region - default all requests to this model
 
 
 class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@@ -1086,9 +1087,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
 
 class AddTeamCallback(LiteLLMPydanticObjectBase):
     callback_name: str
-    callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
-        "success_and_failure"
-    )
+    callback_type: Optional[
+        Literal["success", "failure", "success_and_failure"]
+    ] = "success_and_failure"
     callback_vars: Dict[str, str]
 
     @model_validator(mode="before")
@@ -1346,9 +1347,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
     stored_in_db: Optional[bool]
     field_default_value: Any
     premium_field: bool = False
-    nested_fields: Optional[List[FieldDetail]] = (
-        None  # For nested dictionary or Pydantic fields
-    )
+    nested_fields: Optional[
+        List[FieldDetail]
+    ] = None  # For nested dictionary or Pydantic fields
 
 
 class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@@ -1616,9 +1617,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
     budget_id: Optional[str] = None
     created_at: datetime
     updated_at: datetime
-    user: Optional[Any] = (
-        None  # You might want to replace 'Any' with a more specific type if available
-    )
+    user: Optional[
+        Any
+    ] = None  # You might want to replace 'Any' with a more specific type if available
     litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
 
     model_config = ConfigDict(protected_namespaces=())
@@ -2368,9 +2369,9 @@ class TeamModelDeleteRequest(BaseModel):
 # Organization Member Requests
 class OrganizationMemberAddRequest(OrgMemberAddRequest):
     organization_id: str
-    max_budget_in_organization: Optional[float] = (
-        None  # Users max budget within the organization
-    )
+    max_budget_in_organization: Optional[
+        float
+    ] = None  # Users max budget within the organization
 
 
 class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@@ -2451,6 +2452,7 @@ class SpecialHeaders(enum.Enum):
     anthropic_authorization = "x-api-key"
     google_ai_studio_authorization = "x-goog-api-key"
     azure_apim_authorization = "Ocp-Apim-Subscription-Key"
+    custom_litellm_api_key = "x-litellm-api-key"
 
 
 class LitellmDataForBackendLLMCall(TypedDict, total=False):
@@ -2559,9 +2561,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
     Maps provider names to their budget configs.
     """
 
-    providers: Dict[str, ProviderBudgetResponseObject] = (
-        {}
-    )  # Dictionary mapping provider names to their budget configurations
+    providers: Dict[
+        str, ProviderBudgetResponseObject
+    ] = {}  # Dictionary mapping provider names to their budget configurations
 
 
 class ProxyStateVariables(TypedDict):
@@ -2689,9 +2691,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
     enforce_rbac: bool = False
     roles_jwt_field: Optional[str] = None  # v2 on role mappings
     role_mappings: Optional[List[RoleMapping]] = None
-    object_id_jwt_field: Optional[str] = (
-        None  # can be either user / team, inferred from the role mapping
-    )
+    object_id_jwt_field: Optional[
+        str
+    ] = None  # can be either user / team, inferred from the role mapping
     scope_mappings: Optional[List[ScopeMapping]] = None
     enforce_scope_based_access: bool = False
     enforce_team_based_model_access: bool = False
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
@@ -57,7 +57,11 @@
 
 user_api_key_service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL
 
-
+custom_litellm_key_header = APIKeyHeader(
+    name=SpecialHeaders.custom_litellm_api_key.value,
+    auto_error=False,
+    description="Bearer token",
+)
 api_key_header = APIKeyHeader(
     name=SpecialHeaders.openai_authorization.value,
     auto_error=False,
@@ -228,6 +232,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
     google_ai_studio_api_key_header: Optional[str],
     azure_apim_header: Optional[str],
     request_data: dict,
+    custom_litellm_key_header: Optional[str] = None,
 ) -> UserAPIKeyAuth:
     from litellm.proxy.proxy_server import (
         general_settings,
@@ -261,7 +266,10 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
             "pass_through_endpoints", None
         )
         passed_in_key: Optional[str] = None
-        if isinstance(api_key, str):
+        ## CHECK IF X-LITELM-API-KEY IS PASSED IN - supercedes Authorization header
+        if isinstance(custom_litellm_key_header, str):
+            api_key = custom_litellm_key_header
+        elif isinstance(api_key, str):
             passed_in_key = api_key
             api_key = _get_bearer_token(api_key=api_key)
         elif isinstance(azure_api_key_header, str):
@@ -867,11 +875,12 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                     )
 
             # Check 4. Token Spend is under budget
-            await _virtual_key_max_budget_check(
-                valid_token=valid_token,
-                proxy_logging_obj=proxy_logging_obj,
-                user_obj=user_obj,
-            )
+            if route in LiteLLMRoutes.llm_api_routes.value:
+                await _virtual_key_max_budget_check(
+                    valid_token=valid_token,
+                    proxy_logging_obj=proxy_logging_obj,
+                    user_obj=user_obj,
+                )
 
             # Check 5. Soft Budget Check
             await _virtual_key_soft_budget_check(
@@ -1025,6 +1034,9 @@ async def user_api_key_auth(
         google_ai_studio_api_key_header
     ),
     azure_apim_header: Optional[str] = fastapi.Security(azure_apim_header),
+    custom_litellm_key_header: Optional[str] = fastapi.Security(
+        custom_litellm_key_header
+    ),
 ) -> UserAPIKeyAuth:
     """
     Parent function to authenticate user api key / jwt token.
@@ -1041,6 +1053,7 @@ async def user_api_key_auth(
         google_ai_studio_api_key_header=google_ai_studio_api_key_header,
         azure_apim_header=azure_apim_header,
         request_data=request_data,
+        custom_litellm_key_header=custom_litellm_key_header,
     )
 
     end_user_id = get_end_user_id_from_request_body(request_data)
diff --git a/tests/proxy_unit_tests/test_user_api_key_auth.py b/tests/proxy_unit_tests/test_user_api_key_auth.py
@@ -999,3 +999,40 @@ async def test_jwt_non_admin_team_route_access(monkeypatch):
         except ProxyException as e:
             print("e", e)
             assert "Only proxy admin can be used to generate" in str(e.message)
+
+
+@pytest.mark.asyncio
+async def test_x_litellm_api_key():
+    """
+    Check if auth can pick up x-litellm-api-key header, even if Bearer token is provided
+    """
+    from fastapi import Request
+    from starlette.datastructures import URL
+
+    from litellm.proxy._types import (
+        LiteLLM_TeamTable,
+        LiteLLM_TeamTableCachedObj,
+        UserAPIKeyAuth,
+    )
+    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+
+    master_key = "sk-1234"
+
+    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
+
+    ignored_key = "aj12445"
+
+    # Create request with headers as bytes
+    request = Request(
+        scope={
+            "type": "http"
+        }
+    )
+    request._url = URL(url="/chat/completions")
+
+    valid_token = await user_api_key_auth(request=request, api_key="Bearer " + ignored_key, custom_litellm_key_header=master_key)
+    assert valid_token.token == hash_token(master_key)
+
diff --git a/tests/test_keys.py b/tests/test_keys.py
@@ -842,3 +842,26 @@ async def test_key_user_not_in_db():
             await chat_completion(session=session, key=key)
         except Exception as e:
             pytest.fail(f"Expected this call to work - {str(e)}")
+
+
+@pytest.mark.asyncio
+async def test_key_over_budget():
+    """
+    Test if key over budget is handled as expected.
+    """
+    async with aiohttp.ClientSession() as session:
+        key_gen = await generate_key(session=session, i=0, budget=0.0000001)
+        key = key_gen["key"]
+        try:
+            await chat_completion(session=session, key=key)
+        except Exception as e:
+            pytest.fail(f"Expected this call to work - {str(e)}")
+
+        ## CALL `/models` - expect to work
+        model_list = await get_key_info(session=session, get_key=key, call_key=key)
+        ## CALL `/chat/completions` - expect to fail    
+        try:
+            await chat_completion(session=session, key=key)
+            pytest.fail("Expected this call to fail")
+        except Exception as e:
+            assert "Budget has been exceeded!" in str(e)