BerriAI
diff --git a/‎enterprise/enterprise_hooks/session_handler.py
Lines changed: 136 additions & 0 deletions b/‎enterprise/enterprise_hooks/session_handler.py
Lines changed: 136 additions & 0 deletions
diff --git a/‎litellm-proxy-extras/litellm_proxy_extras/migrations/20250425182129_add_session_id/migration.sql
Lines changed: 4 additions & 0 deletions b/‎litellm-proxy-extras/litellm_proxy_extras/migrations/20250425182129_add_session_id/migration.sql
Lines changed: 4 additions & 0 deletions
diff --git a/‎litellm-proxy-extras/litellm_proxy_extras/schema.prisma
Lines changed: 2 additions & 0 deletions b/‎litellm-proxy-extras/litellm_proxy_extras/schema.prisma
Lines changed: 2 additions & 0 deletions
diff --git a/‎litellm/litellm_core_utils/litellm_logging.py
Lines changed: 20 additions & 3 deletions b/‎litellm/litellm_core_utils/litellm_logging.py
Lines changed: 20 additions & 3 deletions
diff --git a/‎litellm/proxy/_types.py
Lines changed: 35 additions & 33 deletions b/‎litellm/proxy/_types.py
Lines changed: 35 additions & 33 deletions
diff --git a/‎litellm/proxy/proxy_config.yaml
Lines changed: 5 additions & 6 deletions b/‎litellm/proxy/proxy_config.yaml
Lines changed: 5 additions & 6 deletions
diff --git a/‎litellm/proxy/schema.prisma
Lines changed: 2 additions & 0 deletions b/‎litellm/proxy/schema.prisma
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,136 @@
+from litellm.proxy._types import SpendLogsPayload
+from litellm.integrations.custom_logger import CustomLogger
+from litellm._logging import verbose_proxy_logger
+from typing import Optional, List, Union
+import json
+from litellm.types.utils import ModelResponse, Message
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionResponseMessage,
+    GenericChatCompletionMessage,
+    ResponseInputParam,
+)
+from litellm.types.utils import ChatCompletionMessageToolCall
+
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from typing import TypedDict
+
+class ChatCompletionSession(TypedDict, total=False):
+    messages: List[Union[AllMessageValues, GenericChatCompletionMessage, ChatCompletionMessageToolCall, ChatCompletionResponseMessage, Message]]
+    litellm_session_id: Optional[str]
+
+class _ENTERPRISE_ResponsesSessionHandler:
+    @staticmethod
+    async def get_chat_completion_message_history_for_previous_response_id(
+        previous_response_id: str,
+    ) -> ChatCompletionSession:
+        """
+        Return the chat completion message history for a previous response id
+        """
+        from litellm.responses.litellm_completion_transformation.transformation import LiteLLMCompletionResponsesConfig
+        all_spend_logs: List[SpendLogsPayload] = await _ENTERPRISE_ResponsesSessionHandler.get_all_spend_logs_for_previous_response_id(previous_response_id)
+        
+        litellm_session_id: Optional[str] = None
+        if len(all_spend_logs) > 0:
+            litellm_session_id = all_spend_logs[0].get("session_id")
+
+        chat_completion_message_history: List[
+            Union[
+                AllMessageValues,
+                GenericChatCompletionMessage,
+                ChatCompletionMessageToolCall,
+                ChatCompletionResponseMessage,
+                Message,
+            ]
+        ] = []
+        for spend_log in all_spend_logs:
+            proxy_server_request: Union[str, dict] = spend_log.get("proxy_server_request") or "{}"
+            proxy_server_request_dict: Optional[dict] = None
+            response_input_param: Optional[Union[str, ResponseInputParam]] = None
+            if isinstance(proxy_server_request, dict):
+                proxy_server_request_dict = proxy_server_request
+            else:
+                proxy_server_request_dict = json.loads(proxy_server_request)
+            
+            ############################################################
+            # Add Input messages for this Spend Log
+            ############################################################
+            if proxy_server_request_dict:
+                _response_input_param = proxy_server_request_dict.get("input", None)
+                if isinstance(_response_input_param, str):
+                    response_input_param = _response_input_param
+                elif isinstance(_response_input_param, dict):
+                    response_input_param = ResponseInputParam(**_response_input_param)
+            
+            if response_input_param:
+                chat_completion_messages = LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
+                    input=response_input_param,
+                    responses_api_request=proxy_server_request_dict or {}
+                )
+                chat_completion_message_history.extend(chat_completion_messages)
+            
+            ############################################################
+            # Add Output messages for this Spend Log
+            ############################################################
+            _response_output = spend_log.get("response", "{}")
+            if isinstance(_response_output, dict):
+                # transform `ChatCompletion Response` to `ResponsesAPIResponse`
+                model_response = ModelResponse(**_response_output)
+                for choice in model_response.choices:
+                    if hasattr(choice, "message"):
+                        chat_completion_message_history.append(choice.message)
+        
+        verbose_proxy_logger.debug("chat_completion_message_history %s", json.dumps(chat_completion_message_history, indent=4, default=str))
+        return ChatCompletionSession(
+            messages=chat_completion_message_history,
+            litellm_session_id=litellm_session_id
+        )
+
+    @staticmethod
+    async def get_all_spend_logs_for_previous_response_id(
+        previous_response_id: str
+    ) -> List[SpendLogsPayload]:
+        """
+        Get all spend logs for a previous response id
+
+
+        SQL query
+
+        SELECT session_id FROM spend_logs WHERE response_id = previous_response_id, SELECT * FROM spend_logs WHERE session_id = session_id
+        """
+        from litellm.proxy.proxy_server import prisma_client
+        decoded_response_id = ResponsesAPIRequestUtils._decode_responses_api_response_id(previous_response_id)
+        previous_response_id = decoded_response_id.get("response_id", previous_response_id)
+        if prisma_client is None:
+            return []
+
+        query = """
+            WITH matching_session AS (
+                SELECT session_id
+                FROM "LiteLLM_SpendLogs"
+                WHERE request_id = $1
+            )
+            SELECT *
+            FROM "LiteLLM_SpendLogs"
+            WHERE session_id IN (SELECT session_id FROM matching_session)
+            ORDER BY "endTime" ASC;
+        """
+
+        spend_logs = await prisma_client.db.query_raw(
+            query,
+            previous_response_id
+        )
+
+        verbose_proxy_logger.debug(
+            "Found the following spend logs for previous response id %s: %s",
+            previous_response_id,
+            json.dumps(spend_logs, indent=4, default=str)
+        )
+
+
+        return spend_logs
+        
+
+    
+    
+    
@@ -0,0 +1,4 @@
+-- AlterTable
+ALTER TABLE "LiteLLM_SpendLogs" ADD COLUMN     "proxy_server_request" JSONB DEFAULT '{}',
+ADD COLUMN     "session_id" TEXT;
+
@@ -226,6 +226,8 @@ model LiteLLM_SpendLogs {
   requester_ip_address String?
   messages            Json?     @default("{}")
   response            Json?     @default("{}")
+  session_id          String?
+  proxy_server_request Json?     @default("{}")
@@index([startTime])
@@index([end_user])
 }
 
@@ -28,7 +28,6 @@
 from litellm.batches.batch_utils import _handle_completed_batch
 from litellm.caching.caching import DualCache, InMemoryCache
 from litellm.caching.caching_handler import LLMCachingHandler
-
 from litellm.constants import (
     DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT,
     DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
@@ -249,7 +248,7 @@ def __init__(
         self.start_time = start_time  # log the call start time
         self.call_type = call_type
         self.litellm_call_id = litellm_call_id
-        self.litellm_trace_id = litellm_trace_id
+        self.litellm_trace_id: str = litellm_trace_id or str(uuid.uuid4())
         self.function_id = function_id
         self.streaming_chunks: List[Any] = []  # for generating complete stream response
         self.sync_streaming_chunks: List[Any] = (
@@ -3500,6 +3499,21 @@ def get_response_time(
         else:
             return end_time_float - start_time_float
 
+    @staticmethod
+    def _get_standard_logging_payload_trace_id(
+        logging_obj: Logging,
+        litellm_params: dict,
+    ) -> str:
+        """
+        Returns the `litellm_trace_id` for this request
+
+        This helps link sessions when multiple requests are made in a single session
+        """
+        dynamic_trace_id = litellm_params.get("litellm_trace_id")
+        if dynamic_trace_id:
+            return str(dynamic_trace_id)
+        return logging_obj.litellm_trace_id
+
 
 def get_standard_logging_object_payload(
     kwargs: Optional[dict],
@@ -3652,7 +3666,10 @@ def get_standard_logging_object_payload(
 
         payload: StandardLoggingPayload = StandardLoggingPayload(
             id=str(id),
-            trace_id=kwargs.get("litellm_trace_id"),  # type: ignore
+            trace_id=StandardLoggingPayloadSetup._get_standard_logging_payload_trace_id(
+                logging_obj=logging_obj,
+                litellm_params=litellm_params,
+            ),
             call_type=call_type or "",
             cache_hit=cache_hit,
             stream=stream,
 
@@ -654,9 +654,9 @@ class GenerateRequestBase(LiteLLMPydanticObjectBase):
     allowed_cache_controls: Optional[list] = []
     config: Optional[dict] = {}
     permissions: Optional[dict] = {}
-    model_max_budget: Optional[
-        dict
-    ] = {}  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
+    model_max_budget: Optional[dict] = (
+        {}
+    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
 
     model_config = ConfigDict(protected_namespaces=())
     model_rpm_limit: Optional[dict] = None
@@ -918,12 +918,12 @@ class NewCustomerRequest(BudgetNewRequest):
     alias: Optional[str] = None  # human-friendly alias
     blocked: bool = False  # allow/disallow requests for this end-user
     budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[
-        AllowedModelRegion
-    ] = None  # require all user requests to use models in this specific region
-    default_model: Optional[
-        str
-    ] = None  # if no equivalent model in allowed region - default all requests to this model
+    allowed_model_region: Optional[AllowedModelRegion] = (
+        None  # require all user requests to use models in this specific region
+    )
+    default_model: Optional[str] = (
+        None  # if no equivalent model in allowed region - default all requests to this model
+    )
 
     @model_validator(mode="before")
     @classmethod
@@ -945,12 +945,12 @@ class UpdateCustomerRequest(LiteLLMPydanticObjectBase):
     blocked: bool = False  # allow/disallow requests for this end-user
     max_budget: Optional[float] = None
     budget_id: Optional[str] = None  # give either a budget_id or max_budget
-    allowed_model_region: Optional[
-        AllowedModelRegion
-    ] = None  # require all user requests to use models in this specific region
-    default_model: Optional[
-        str
-    ] = None  # if no equivalent model in allowed region - default all requests to this model
+    allowed_model_region: Optional[AllowedModelRegion] = (
+        None  # require all user requests to use models in this specific region
+    )
+    default_model: Optional[str] = (
+        None  # if no equivalent model in allowed region - default all requests to this model
+    )
 
 
 class DeleteCustomerRequest(LiteLLMPydanticObjectBase):
@@ -1086,9 +1086,9 @@ class BlockKeyRequest(LiteLLMPydanticObjectBase):
 
 class AddTeamCallback(LiteLLMPydanticObjectBase):
     callback_name: str
-    callback_type: Optional[
-        Literal["success", "failure", "success_and_failure"]
-    ] = "success_and_failure"
+    callback_type: Optional[Literal["success", "failure", "success_and_failure"]] = (
+        "success_and_failure"
+    )
     callback_vars: Dict[str, str]
 
     @model_validator(mode="before")
@@ -1346,9 +1346,9 @@ class ConfigList(LiteLLMPydanticObjectBase):
     stored_in_db: Optional[bool]
     field_default_value: Any
     premium_field: bool = False
-    nested_fields: Optional[
-        List[FieldDetail]
-    ] = None  # For nested dictionary or Pydantic fields
+    nested_fields: Optional[List[FieldDetail]] = (
+        None  # For nested dictionary or Pydantic fields
+    )
 
 
 class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
@@ -1616,9 +1616,9 @@ class LiteLLM_OrganizationMembershipTable(LiteLLMPydanticObjectBase):
     budget_id: Optional[str] = None
     created_at: datetime
     updated_at: datetime
-    user: Optional[
-        Any
-    ] = None  # You might want to replace 'Any' with a more specific type if available
+    user: Optional[Any] = (
+        None  # You might want to replace 'Any' with a more specific type if available
+    )
     litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
 
     model_config = ConfigDict(protected_namespaces=())
@@ -2015,6 +2015,8 @@ class SpendLogsPayload(TypedDict):
     custom_llm_provider: Optional[str]
     messages: Optional[Union[str, list, dict]]
     response: Optional[Union[str, list, dict]]
+    proxy_server_request: Optional[str]
+    session_id: Optional[str]
 
 
 class SpanAttributes(str, enum.Enum):
@@ -2366,9 +2368,9 @@ class TeamModelDeleteRequest(BaseModel):
 # Organization Member Requests
 class OrganizationMemberAddRequest(OrgMemberAddRequest):
     organization_id: str
-    max_budget_in_organization: Optional[
-        float
-    ] = None  # Users max budget within the organization
+    max_budget_in_organization: Optional[float] = (
+        None  # Users max budget within the organization
+    )
 
 
 class OrganizationMemberDeleteRequest(MemberDeleteRequest):
@@ -2557,9 +2559,9 @@ class ProviderBudgetResponse(LiteLLMPydanticObjectBase):
     Maps provider names to their budget configs.
     """
 
-    providers: Dict[
-        str, ProviderBudgetResponseObject
-    ] = {}  # Dictionary mapping provider names to their budget configurations
+    providers: Dict[str, ProviderBudgetResponseObject] = (
+        {}
+    )  # Dictionary mapping provider names to their budget configurations
 
 
 class ProxyStateVariables(TypedDict):
@@ -2687,9 +2689,9 @@ class LiteLLM_JWTAuth(LiteLLMPydanticObjectBase):
     enforce_rbac: bool = False
     roles_jwt_field: Optional[str] = None  # v2 on role mappings
     role_mappings: Optional[List[RoleMapping]] = None
-    object_id_jwt_field: Optional[
-        str
-    ] = None  # can be either user / team, inferred from the role mapping
+    object_id_jwt_field: Optional[str] = (
+        None  # can be either user / team, inferred from the role mapping
+    )
     scope_mappings: Optional[List[ScopeMapping]] = None
     enforce_scope_based_access: bool = False
     enforce_team_based_model_access: bool = False
 
@@ -1,8 +1,7 @@
 model_list:
-  - model_name: openai/*
+  - model_name: anthropic/*
     litellm_params:
-      model: openai/*
-      api_key: os.environ/OPENAI_API_KEY
-
-router_settings:
-  optional_pre_call_checks: ["responses_api_deployment_check"]
+      model: anthropic/*
+      api_key: os.environ/ANTHROPIC_API_KEY
+general_settings:
+  store_prompts_in_spend_logs: true
@@ -226,6 +226,8 @@ model LiteLLM_SpendLogs {
   requester_ip_address String?
   messages            Json?     @default("{}")
   response            Json?     @default("{}")
+  session_id          String?
+  proxy_server_request Json?     @default("{}")
@@index([startTime])
@@index([end_user])
 }
Original file line number	Diff line number	Diff line change
`@@ -226,6 +226,8 @@ model LiteLLM_SpendLogs {`
`226`	`226`	`requester_ip_address String?`
`227`	`227`	`messages Json? @default("{}")`
`228`	`228`	`response Json? @default("{}")`
	`229`	`+ session_id String?`
	`230`	`+ proxy_server_request Json? @default("{}")`
`229`	`231`	`@@index([startTime])`
`230`	`232`	`@@index([end_user])`
`231`	`233`	`}`