Skip to content

Commit 72e961a

Browse files
LiteLLM Minor Fixes and Improvements (08/06/2024) (#5567)
* fix(utils.py): return citations for perplexity streaming Fixes #5535 * fix(anthropic/chat.py): support fallbacks for anthropic streaming (#5542) * fix(anthropic/chat.py): support fallbacks for anthropic streaming Fixes #5512 * fix(anthropic/chat.py): use module level http client if none given (prevents early client closure) * fix: fix linting errors * fix(http_handler.py): fix raise_for_status error handling * test: retry flaky test * fix otel type * fix(bedrock/embed): fix error raising * test(test_openai_batches_and_files.py): skip azure batches test (for now) quota exceeded * fix(test_router.py): skip azure batch route test (for now) - hit batch quota limits --------- Co-authored-by: Ishaan Jaff <[email protected]> * All `model_group_alias` should show up in `/models`, `/model/info` , `/model_group/info` (#5539) * fix(router.py): support returning model_alias model names in `/v1/models` * fix(proxy_server.py): support returning model alias'es on `/model/info` * feat(router.py): support returning model group alias for `/model_group/info` * fix(proxy_server.py): fix linting errors * fix(proxy_server.py): fix linting errors * build(model_prices_and_context_window.json): add amazon titan text premier pricing information Closes #5560 * feat(litellm_logging.py): log standard logging response object for pass through endpoints. Allows bedrock /invoke agent calls to be correctly logged to langfuse + s3 * fix(success_handler.py): fix linting error * fix(success_handler.py): fix linting errors * fix(team_endpoints.py): Allows admin to update team member budgets --------- Co-authored-by: Ishaan Jaff <[email protected]>
1 parent e4dcd6f commit 72e961a

25 files changed

+508
-98
lines changed

.devcontainer/devcontainer.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
"ms-python.python",
2323
"ms-python.vscode-pylance",
2424
"GitHub.copilot",
25-
"GitHub.copilot-chat"
25+
"GitHub.copilot-chat",
26+
"ms-python.autopep8"
2627
]
2728
}
2829
},

.pre-commit-config.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
repos:
22
- repo: local
33
hooks:
4-
# - id: mypy
5-
# name: mypy
6-
# entry: python3 -m mypy --ignore-missing-imports
7-
# language: system
8-
# types: [python]
9-
# files: ^litellm/
4+
- id: mypy
5+
name: mypy
6+
entry: python3 -m mypy --ignore-missing-imports
7+
language: system
8+
types: [python]
9+
files: ^litellm/
1010
- id: isort
1111
name: isort
1212
entry: isort

litellm/integrations/langfuse.py

+8
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,14 @@ def log_event(
208208
):
209209
input = prompt
210210
output = response_obj["text"]
211+
elif (
212+
kwargs.get("call_type") is not None
213+
and kwargs.get("call_type") == "pass_through_endpoint"
214+
and response_obj is not None
215+
and isinstance(response_obj, dict)
216+
):
217+
input = prompt
218+
output = response_obj.get("response", "")
211219
print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
212220
trace_id = None
213221
generation_id = None

litellm/integrations/s3.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,6 @@ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
101101
metadata = (
102102
litellm_params.get("metadata", {}) or {}
103103
) # if litellm_params['metadata'] == None
104-
messages = kwargs.get("messages")
105-
optional_params = kwargs.get("optional_params", {})
106-
call_type = kwargs.get("call_type", "litellm.completion")
107-
cache_hit = kwargs.get("cache_hit", False)
108-
usage = response_obj["usage"]
109-
id = response_obj.get("id", str(uuid.uuid4()))
110104

111105
# Clean Metadata before logging - never log raw metadata
112106
# the raw metadata can contain circular references which leads to infinite recursion
@@ -171,5 +165,5 @@ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
171165
print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
172166
return response
173167
except Exception as e:
174-
verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
168+
verbose_logger.exception(f"s3 Layer Error - {str(e)}")
175169
pass

litellm/litellm_core_utils/litellm_logging.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
StandardLoggingMetadata,
4242
StandardLoggingModelInformation,
4343
StandardLoggingPayload,
44+
StandardPassThroughResponseObject,
4445
TextCompletionResponse,
4546
TranscriptionResponse,
4647
)
@@ -534,7 +535,9 @@ def _response_cost_calculator(
534535
"""
535536
## RESPONSE COST ##
536537
custom_pricing = use_custom_pricing_for_model(
537-
litellm_params=self.litellm_params
538+
litellm_params=(
539+
self.litellm_params if hasattr(self, "litellm_params") else None
540+
)
538541
)
539542

540543
if cache_hit is None:
@@ -611,6 +614,17 @@ def _success_handler_helper_fn(
611614
] = result._hidden_params
612615
## STANDARDIZED LOGGING PAYLOAD
613616

617+
self.model_call_details["standard_logging_object"] = (
618+
get_standard_logging_object_payload(
619+
kwargs=self.model_call_details,
620+
init_response_obj=result,
621+
start_time=start_time,
622+
end_time=end_time,
623+
logging_obj=self,
624+
)
625+
)
626+
elif isinstance(result, dict): # pass-through endpoints
627+
## STANDARDIZED LOGGING PAYLOAD
614628
self.model_call_details["standard_logging_object"] = (
615629
get_standard_logging_object_payload(
616630
kwargs=self.model_call_details,
@@ -2271,6 +2285,8 @@ def get_standard_logging_object_payload(
22712285
elif isinstance(init_response_obj, BaseModel):
22722286
response_obj = init_response_obj.model_dump()
22732287
hidden_params = getattr(init_response_obj, "_hidden_params", None)
2288+
elif isinstance(init_response_obj, dict):
2289+
response_obj = init_response_obj
22742290
else:
22752291
response_obj = {}
22762292
# standardize this function to be used across, s3, dynamoDB, langfuse logging

litellm/llms/anthropic/chat.py

+34-35
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ async def make_call(
674674
timeout: Optional[Union[float, httpx.Timeout]],
675675
):
676676
if client is None:
677-
client = _get_async_httpx_client() # Create a new client if none provided
677+
client = litellm.module_level_aclient
678678

679679
try:
680680
response = await client.post(
@@ -690,11 +690,6 @@ async def make_call(
690690
raise e
691691
raise AnthropicError(status_code=500, message=str(e))
692692

693-
if response.status_code != 200:
694-
raise AnthropicError(
695-
status_code=response.status_code, message=await response.aread()
696-
)
697-
698693
completion_stream = ModelResponseIterator(
699694
streaming_response=response.aiter_lines(), sync_stream=False
700695
)
@@ -721,7 +716,7 @@ def make_sync_call(
721716
timeout: Optional[Union[float, httpx.Timeout]],
722717
):
723718
if client is None:
724-
client = HTTPHandler() # Create a new client if none provided
719+
client = litellm.module_level_client # re-use a module level client
725720

726721
try:
727722
response = client.post(
@@ -869,6 +864,7 @@ async def acompletion_stream_function(
869864
model_response: ModelResponse,
870865
print_verbose: Callable,
871866
timeout: Union[float, httpx.Timeout],
867+
client: Optional[AsyncHTTPHandler],
872868
encoding,
873869
api_key,
874870
logging_obj,
@@ -882,19 +878,18 @@ async def acompletion_stream_function(
882878
):
883879
data["stream"] = True
884880

881+
completion_stream = await make_call(
882+
client=client,
883+
api_base=api_base,
884+
headers=headers,
885+
data=json.dumps(data),
886+
model=model,
887+
messages=messages,
888+
logging_obj=logging_obj,
889+
timeout=timeout,
890+
)
885891
streamwrapper = CustomStreamWrapper(
886-
completion_stream=None,
887-
make_call=partial(
888-
make_call,
889-
client=None,
890-
api_base=api_base,
891-
headers=headers,
892-
data=json.dumps(data),
893-
model=model,
894-
messages=messages,
895-
logging_obj=logging_obj,
896-
timeout=timeout,
897-
),
892+
completion_stream=completion_stream,
898893
model=model,
899894
custom_llm_provider="anthropic",
900895
logging_obj=logging_obj,
@@ -1080,6 +1075,11 @@ def completion(
10801075
logger_fn=logger_fn,
10811076
headers=headers,
10821077
timeout=timeout,
1078+
client=(
1079+
client
1080+
if client is not None and isinstance(client, AsyncHTTPHandler)
1081+
else None
1082+
),
10831083
)
10841084
else:
10851085
return self.acompletion_function(
@@ -1105,33 +1105,32 @@ def completion(
11051105
)
11061106
else:
11071107
## COMPLETION CALL
1108-
if client is None or not isinstance(client, HTTPHandler):
1109-
client = HTTPHandler(timeout=timeout) # type: ignore
1110-
else:
1111-
client = client
11121108
if (
11131109
stream is True
11141110
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
11151111
data["stream"] = stream
1112+
completion_stream = make_sync_call(
1113+
client=client,
1114+
api_base=api_base,
1115+
headers=headers, # type: ignore
1116+
data=json.dumps(data),
1117+
model=model,
1118+
messages=messages,
1119+
logging_obj=logging_obj,
1120+
timeout=timeout,
1121+
)
11161122
return CustomStreamWrapper(
1117-
completion_stream=None,
1118-
make_call=partial(
1119-
make_sync_call,
1120-
client=None,
1121-
api_base=api_base,
1122-
headers=headers, # type: ignore
1123-
data=json.dumps(data),
1124-
model=model,
1125-
messages=messages,
1126-
logging_obj=logging_obj,
1127-
timeout=timeout,
1128-
),
1123+
completion_stream=completion_stream,
11291124
model=model,
11301125
custom_llm_provider="anthropic",
11311126
logging_obj=logging_obj,
11321127
)
11331128

11341129
else:
1130+
if client is None or not isinstance(client, HTTPHandler):
1131+
client = HTTPHandler(timeout=timeout) # type: ignore
1132+
else:
1133+
client = client
11351134
response = client.post(
11361135
api_base, headers=headers, data=json.dumps(data), timeout=timeout
11371136
)

litellm/llms/bedrock/embed/embedding.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def _make_sync_call(
110110
response.raise_for_status()
111111
except httpx.HTTPStatusError as err:
112112
error_code = err.response.status_code
113-
raise BedrockError(status_code=error_code, message=response.text)
113+
raise BedrockError(status_code=error_code, message=err.response.text)
114114
except httpx.TimeoutException:
115115
raise BedrockError(status_code=408, message="Timeout error occurred.")
116116

litellm/llms/custom_httpx/http_handler.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,10 @@ def create_client(
3737

3838
# SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
3939
# /path/to/certificate.pem
40-
ssl_verify = os.getenv(
41-
"SSL_VERIFY",
42-
litellm.ssl_verify
43-
)
40+
ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)
4441
# An SSL certificate used by the requested host to authenticate the client.
4542
# /path/to/client.pem
46-
cert = os.getenv(
47-
"SSL_CERTIFICATE",
48-
litellm.ssl_certificate
49-
)
43+
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
5044

5145
if timeout is None:
5246
timeout = _DEFAULT_TIMEOUT
@@ -277,16 +271,10 @@ def __init__(
277271

278272
# SSL certificates (a.k.a CA bundle) used to verify the identity of requested hosts.
279273
# /path/to/certificate.pem
280-
ssl_verify = os.getenv(
281-
"SSL_VERIFY",
282-
litellm.ssl_verify
283-
)
274+
ssl_verify = os.getenv("SSL_VERIFY", litellm.ssl_verify)
284275
# An SSL certificate used by the requested host to authenticate the client.
285276
# /path/to/client.pem
286-
cert = os.getenv(
287-
"SSL_CERTIFICATE",
288-
litellm.ssl_certificate
289-
)
277+
cert = os.getenv("SSL_CERTIFICATE", litellm.ssl_certificate)
290278

291279
if client is None:
292280
# Create a client with a connection pool
@@ -334,13 +322,21 @@ def post(
334322
"POST", url, data=data, json=json, params=params, headers=headers # type: ignore
335323
)
336324
response = self.client.send(req, stream=stream)
325+
response.raise_for_status()
337326
return response
338327
except httpx.TimeoutException:
339328
raise litellm.Timeout(
340329
message=f"Connection timed out after {timeout} seconds.",
341330
model="default-model-name",
342331
llm_provider="litellm-httpx-handler",
343332
)
333+
except httpx.HTTPStatusError as e:
334+
setattr(e, "status_code", e.response.status_code)
335+
if stream is True:
336+
setattr(e, "message", e.response.read())
337+
else:
338+
setattr(e, "message", e.response.text)
339+
raise e
344340
except Exception as e:
345341
raise e
346342

@@ -375,7 +371,6 @@ def put(
375371
except Exception as e:
376372
raise e
377373

378-
379374
def __del__(self) -> None:
380375
try:
381376
self.close()
@@ -437,4 +432,4 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
437432
_new_client = HTTPHandler(timeout=httpx.Timeout(timeout=600.0, connect=5.0))
438433

439434
litellm.in_memory_llm_clients_cache[_cache_key_name] = _new_client
440-
return _new_client
435+
return _new_client

litellm/main.py

+11
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,15 @@ def mock_completion(
534534
model=model, # type: ignore
535535
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"),
536536
)
537+
elif isinstance(mock_response, str) and mock_response.startswith(
538+
"Exception: mock_streaming_error"
539+
):
540+
mock_response = litellm.MockException(
541+
message="This is a mock error raised mid-stream",
542+
llm_provider="anthropic",
543+
model=model,
544+
status_code=529,
545+
)
537546
time_delay = kwargs.get("mock_delay", None)
538547
if time_delay is not None:
539548
time.sleep(time_delay)
@@ -561,6 +570,8 @@ def mock_completion(
561570
custom_llm_provider="openai",
562571
logging_obj=logging,
563572
)
573+
if isinstance(mock_response, litellm.MockException):
574+
raise mock_response
564575
if n is None:
565576
model_response.choices[0].message.content = mock_response # type: ignore
566577
else:

litellm/model_prices_and_context_window_backup.json

+9
Original file line numberDiff line numberDiff line change
@@ -3408,6 +3408,15 @@
34083408
"litellm_provider": "bedrock",
34093409
"mode": "chat"
34103410
},
3411+
"amazon.titan-text-premier-v1:0": {
3412+
"max_tokens": 32000,
3413+
"max_input_tokens": 42000,
3414+
"max_output_tokens": 32000,
3415+
"input_cost_per_token": 0.0000005,
3416+
"output_cost_per_token": 0.0000015,
3417+
"litellm_provider": "bedrock",
3418+
"mode": "chat"
3419+
},
34113420
"amazon.titan-embed-text-v1": {
34123421
"max_tokens": 8192,
34133422
"max_input_tokens": 8192,

litellm/proxy/_new_secret_config.yaml

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
model_list:
2-
- model_name: "*"
2+
- model_name: "anthropic/claude-3-5-sonnet-20240620"
3+
litellm_params:
4+
model: anthropic/claude-3-5-sonnet-20240620
5+
# api_base: http://0.0.0.0:9000
6+
- model_name: gpt-3.5-turbo
37
litellm_params:
48
model: openai/*
59

610
litellm_settings:
7-
default_max_internal_user_budget: 2
11+
success_callback: ["s3"]
12+
s3_callback_params:
13+
s3_bucket_name: litellm-logs # AWS Bucket Name for S3
14+
s3_region_name: us-west-2 # AWS Region Name for S3
15+
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
16+
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3

0 commit comments

Comments
 (0)