Conversation
Instrument `ChatCompletionsClient.complete()` and `EmbeddingsClient.embed()` from the `azure-ai-inference` SDK with custom monkey-patching. Supports sync and async clients, streaming, tool calls, and both v1/latest semconv versions.
Deploying logfire-docs with
|
| Latest commit: |
f175bd4
|
| Status: | ✅ Deploy successful! |
| Preview URL: | https://0937dda8.logfire-docs.pages.dev |
| Branch Preview URL: | https://add-azure-ai-inference-integ.logfire-docs.pages.dev |
| def _record_chunk(self, chunk: Any) -> None: | ||
| for choice in getattr(chunk, 'choices', []): | ||
| delta = getattr(choice, 'delta', None) | ||
| if delta: | ||
| content = getattr(delta, 'content', None) | ||
| if content: | ||
| self._chunks.append(content) | ||
|
|
||
| def _get_stream_attributes(self) -> dict[str, Any]: | ||
| result = dict(**self._span_data) | ||
| combined = ''.join(self._chunks) | ||
| if 1 in self._versions: | ||
| result['response_data'] = { | ||
| 'combined_chunk_content': combined, | ||
| 'chunk_count': len(self._chunks), | ||
| } | ||
| if 'latest' in self._versions and self._chunks: | ||
| result[OUTPUT_MESSAGES] = [ | ||
| OutputMessage( | ||
| role='assistant', | ||
| parts=[TextPart(type='text', content=combined)], | ||
| ) | ||
| ] | ||
| return result |
There was a problem hiding this comment.
🚩 Streaming info span missing response metadata (model, id, token usage)
The _record_chunk method at logfire/_internal/integrations/llm_providers/azure_ai_inference.py:642-648 only extracts text content from streaming chunks. Each StreamingChatCompletionsUpdate chunk also carries model, id, and (on the final chunk) usage info. The _get_stream_attributes method returns self._span_data (request-time data) augmented with combined chunk content, but never includes gen_ai.response.model, gen_ai.response.id, or token usage from the stream.
By contrast, the OpenAI integration's OpenaiChatCompletionStreamState uses ChatCompletionStreamState to reconstruct a full completion snapshot including model, usage, and message details.
This means streaming responses will be missing response metadata that non-streaming responses include. This is a feature gap rather than a correctness bug, but it's worth noting for parity with the OpenAI integration.
Was this helpful? React with 👍 or 👎 to provide feedback.
| assert exporter.exported_spans_as_dict(parse_json_attributes=True) == snapshot( | ||
| [ | ||
| { | ||
| 'name': 'Chat completion with {request_data[model]!r}', | ||
| 'context': {'trace_id': 1, 'span_id': 1, 'is_remote': False}, | ||
| 'parent': None, | ||
| 'start_time': 1000000000, | ||
| 'end_time': 2000000000, | ||
| 'attributes': { | ||
| 'code.filepath': 'test_azure_ai_inference.py', | ||
| 'code.function': 'test_sync_chat_streaming', | ||
| 'code.lineno': 123, | ||
| 'request_data': { | ||
| 'model': 'gpt-4', | ||
| 'messages': [{'role': 'user', 'content': "[Scrubbed due to 'secret']"}], | ||
| }, | ||
| 'gen_ai.provider.name': 'azure.ai.inference', | ||
| 'gen_ai.operation.name': 'chat', | ||
| 'gen_ai.request.model': 'gpt-4', | ||
| 'gen_ai.input.messages': [ | ||
| {'role': 'user', 'parts': [{'type': 'text', 'content': 'Tell me a secret'}]} | ||
| ], | ||
| 'logfire.msg_template': 'Chat completion with {request_data[model]!r}', | ||
| 'logfire.msg': "Chat completion with 'gpt-4'", | ||
| 'logfire.json_schema': { | ||
| 'type': 'object', | ||
| 'properties': { | ||
| 'request_data': {'type': 'object'}, | ||
| 'gen_ai.provider.name': {}, | ||
| 'gen_ai.operation.name': {}, | ||
| 'gen_ai.request.model': {}, | ||
| 'gen_ai.input.messages': {'type': 'array'}, | ||
| }, | ||
| }, | ||
| 'logfire.tags': ('LLM',), | ||
| 'logfire.span_type': 'span', | ||
| 'gen_ai.response.model': 'gpt-4', | ||
| 'logfire.scrubbed': [ | ||
| { | ||
| 'path': ['attributes', 'request_data', 'messages', 0, 'content'], | ||
| 'matched_substring': 'secret', | ||
| } | ||
| ], | ||
| }, | ||
| }, | ||
| { | ||
| 'name': 'streaming response from {request_data[model]!r} took {duration:.2f}s', | ||
| 'context': {'trace_id': 2, 'span_id': 3, 'is_remote': False}, | ||
| 'parent': None, | ||
| 'start_time': 5000000000, | ||
| 'end_time': 5000000000, | ||
| 'attributes': { | ||
| 'logfire.span_type': 'log', | ||
| 'logfire.level_num': 9, | ||
| 'logfire.msg_template': 'streaming response from {request_data[model]!r} took {duration:.2f}s', | ||
| 'logfire.msg': "streaming response from 'gpt-4' took 1.00s", | ||
| 'code.filepath': 'test_azure_ai_inference.py', | ||
| 'code.function': 'test_sync_chat_streaming', | ||
| 'code.lineno': 123, | ||
| 'duration': 1.0, | ||
| 'request_data': { | ||
| 'model': 'gpt-4', | ||
| 'messages': [{'role': 'user', 'content': "[Scrubbed due to 'secret']"}], | ||
| }, | ||
| 'gen_ai.provider.name': 'azure.ai.inference', | ||
| 'gen_ai.operation.name': 'chat', | ||
| 'gen_ai.request.model': 'gpt-4', | ||
| 'gen_ai.input.messages': [ | ||
| {'role': 'user', 'parts': [{'type': 'text', 'content': 'Tell me a secret'}]} | ||
| ], | ||
| 'response_data': {'combined_chunk_content': "[Scrubbed due to 'secret']", 'chunk_count': 2}, | ||
| 'gen_ai.output.messages': [ | ||
| {'role': 'assistant', 'parts': [{'type': 'text', 'content': 'The answer is secret'}]} | ||
| ], | ||
| 'logfire.json_schema': { | ||
| 'type': 'object', | ||
| 'properties': { | ||
| 'duration': {}, | ||
| 'request_data': {'type': 'object'}, | ||
| 'gen_ai.provider.name': {}, | ||
| 'gen_ai.operation.name': {}, | ||
| 'gen_ai.request.model': {}, | ||
| 'gen_ai.input.messages': {'type': 'array'}, | ||
| 'response_data': {'type': 'object'}, | ||
| 'gen_ai.output.messages': {'type': 'array'}, | ||
| }, | ||
| }, | ||
| 'logfire.tags': ('LLM',), | ||
| 'gen_ai.response.model': 'gpt-4', | ||
| 'logfire.scrubbed': [ | ||
| { | ||
| 'path': ['attributes', 'request_data', 'messages', 0, 'content'], | ||
| 'matched_substring': 'secret', | ||
| }, | ||
| { | ||
| 'path': ['attributes', 'response_data', 'combined_chunk_content'], | ||
| 'matched_substring': 'secret', | ||
| }, | ||
| ], | ||
| }, | ||
| }, | ||
| ] | ||
| ) |
There was a problem hiding this comment.
🚩 Test snapshots not actively validated due to inline-snapshot disable default
The project's pyproject.toml sets default-flags = ["disable"] for inline-snapshot, meaning snapshot() comparisons are effectively skipped during normal pytest runs. This means the test snapshots in test_azure_ai_inference.py are not being validated unless explicitly run with --inline-snapshot=update or --inline-snapshot=review.
Notably, the streaming test snapshot at line 331 shows 'gen_ai.response.model': 'gpt-4' on the first (request) span, but analyzing the code path for streaming at logfire/_internal/integrations/llm_providers/azure_ai_inference.py:193-205, the span exits via return _SyncStreamWrapper(...) without ever calling _on_chat_response which is what sets RESPONSE_MODEL. The same applies to the streaming info span at line 383 — _get_stream_attributes copies _span_data which only contains REQUEST_MODEL, not RESPONSE_MODEL. These snapshot values may be stale or auto-generated from an earlier code revision.
Was this helpful? React with 👍 or 👎 to provide feedback.
There was a problem hiding this comment.
comparisons to snapshots aren't disabled, they just won't update by default.
a span processor automatically fills in the response model from the request model as a fallback which explains this.
|
@Kludex we don't want the old 'semconv' |
Only use the latest OTel GenAI semantic convention attributes (gen_ai.input.messages, gen_ai.output.messages, etc.) instead of the legacy request_data/response_data format. Also backfill model from response when the request model is None (Azure OpenAI deployments).
| def instrument_azure_ai_inference( | ||
| self, | ||
| azure_ai_inference_client: Any = None, | ||
| *, | ||
| suppress_other_instrumentation: bool = True, | ||
| ) -> AbstractContextManager[None]: | ||
| """Instrument an Azure AI Inference client so that spans are automatically created for each request. | ||
|
|
||
| Supports both the sync and async clients from the | ||
| [`azure-ai-inference`](https://pypi.org/project/azure-ai-inference/) package: | ||
|
|
||
| - [`ChatCompletionsClient.complete`](https://learn.microsoft.com/python/api/azure-ai-inference/azure.ai.inference.chatcompletionsclient) - with and without `stream=True` | ||
| - [`EmbeddingsClient.embed`](https://learn.microsoft.com/python/api/azure-ai-inference/azure.ai.inference.embeddingsclient) | ||
|
|
||
| Example usage: | ||
|
|
||
| ```python skip-run="true" skip-reason="external-connection" | ||
| from azure.ai.inference import ChatCompletionsClient | ||
| from azure.core.credentials import AzureKeyCredential | ||
|
|
||
| import logfire | ||
|
|
||
| client = ChatCompletionsClient( | ||
| endpoint='https://my-endpoint.inference.ai.azure.com', | ||
| credential=AzureKeyCredential('my-api-key'), | ||
| ) | ||
|
|
||
| logfire.configure() | ||
| logfire.instrument_azure_ai_inference(client) | ||
|
|
||
| response = client.complete( | ||
| model='gpt-4', | ||
| messages=[{'role': 'user', 'content': 'What is four plus five?'}], | ||
| ) | ||
| print(response.choices[0].message.content) | ||
| ``` | ||
|
|
||
| Args: | ||
| azure_ai_inference_client: The Azure AI Inference client or class to instrument: | ||
|
|
||
| - `None` (the default) to instrument all Azure AI Inference client classes. | ||
| - A `ChatCompletionsClient` or `EmbeddingsClient` class or instance (sync or async). | ||
|
|
||
| suppress_other_instrumentation: If True, suppress any other OTEL instrumentation that may be otherwise | ||
| enabled. In reality, this means the Azure Core tracing instrumentation, which could otherwise be | ||
| called since the Azure SDK uses its own pipeline to make HTTP requests. | ||
|
|
||
| Returns: | ||
| A context manager that will revert the instrumentation when exited. | ||
| Use of this context manager is optional. | ||
| """ | ||
| try: | ||
| from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient | ||
| except ImportError: | ||
| raise RuntimeError( | ||
| 'The `logfire.instrument_azure_ai_inference()` method ' | ||
| 'requires the `azure-ai-inference` package.\n' | ||
| 'You can install this with:\n' | ||
| " pip install 'logfire[azure-ai-inference]'" | ||
| ) | ||
|
|
||
| from .integrations.llm_providers.azure_ai_inference import instrument_azure_ai_inference | ||
|
|
||
| self._warn_if_not_initialized_for_instrumentation() | ||
|
|
||
| if azure_ai_inference_client is None: | ||
| clients_to_instrument: list[Any] = [ChatCompletionsClient, EmbeddingsClient] | ||
| try: | ||
| from azure.ai.inference.aio import ( | ||
| ChatCompletionsClient as AsyncChatCompletionsClient, | ||
| EmbeddingsClient as AsyncEmbeddingsClient, | ||
| ) | ||
|
|
||
| clients_to_instrument.extend([AsyncChatCompletionsClient, AsyncEmbeddingsClient]) | ||
| except ImportError: # pragma: no cover | ||
| pass | ||
| azure_ai_inference_client = clients_to_instrument | ||
|
|
||
| return instrument_azure_ai_inference( | ||
| self, | ||
| azure_ai_inference_client, | ||
| suppress_other_instrumentation, | ||
| ) |
There was a problem hiding this comment.
🚩 No semconv version parameter unlike OpenAI/Anthropic integrations
The OpenAI (logfire/_internal/main.py:1237) and Anthropic (logfire/_internal/main.py:1308) instrument_* methods accept a version: SemconvVersion parameter that controls whether to emit legacy request_data/response_data attributes, new OTel GenAI semconv attributes, or both. The Azure integration always emits the new semconv format alongside a minimal request_data (for message template compatibility), with no version toggle.
This means users cannot get the full legacy response_data blob that other integrations provide in version 1 mode. If this is intentional simplification (since this is a new integration with no backward compatibility concerns), it's fine. But it's an inconsistency with the existing integration API surface that users migrating between providers might notice.
Was this helpful? React with 👍 or 👎 to provide feedback.
Use conditional message templates so spans show "Chat completion" instead of "Chat completion with None" when model isn't in request params (Azure OpenAI deployments). Backfill updates the span message once the response arrives. Add 15 new tests covering model backfill, suppress=False, list instrumentation, all request parameters, body-style params, multimodal content items, and stream context managers. Coverage: 73% -> 89%.
Add comprehensive tests for all code paths including: - Positional arg extraction in _extract_params - Tool objects with as_dict() method - Model backfill when response has no model - Minimal/empty responses (no model, id, usage, finish_reason) - Choice without message in response conversion - Stream context manager delegation (__enter__/__exit__, __aenter__/__aexit__) - Streaming with empty chunks and no-model chunks - System messages with non-string content - Response tool calls without function attribute Add pragma: no cover to defensive paths (is_instrumentation_suppressed, except Exception, ImportError) matching patterns from other integrations.
| log_internal_error() | ||
| return original(*args, **kwargs) | ||
|
|
||
| is_streaming = kwargs.get('stream', False) |
There was a problem hiding this comment.
🟡 Streaming flag not detected when stream is passed inside the body dict parameter
When a user calls client.complete(body={'messages': [...], 'model': 'gpt-4', 'stream': True}), the streaming detection at kwargs.get('stream', False) returns False because stream is nested inside the body dict, not a top-level kwarg. The Azure AI Inference SDK supports this body-style calling pattern, and the code explicitly handles it in _extract_params (logfire/_internal/integrations/llm_providers/azure_ai_inference.py:310-311), but the streaming check doesn't consult the extracted params.
Root Cause and Impact
The _extract_params function correctly extracts parameters from the body dict:
if 'body' in kwargs and isinstance(kwargs['body'], dict):
return kwargs['body']But the streaming check on lines 159 and 187 only looks at top-level kwargs:
is_streaming = kwargs.get('stream', False)When stream=True is inside body, is_streaming is incorrectly False. As a result:
- The streaming response is not wrapped in
_SyncStreamWrapper/_AsyncStreamWrapper _on_chat_responseis called on the streaming iterator (which lacks.choices,.usage, etc.), producing empty/missing telemetry- The secondary streaming info span is never emitted
- Streaming instrumentation metadata (output messages, duration) is silently lost
This affects both the sync path (line 187) and async path (line 159).
Prompt for agents
In logfire/_internal/integrations/llm_providers/azure_ai_inference.py, fix the streaming detection to also check the extracted params dict. In both _make_instrumented_complete functions (sync at line 187 and async at line 159), the line:
is_streaming = kwargs.get('stream', False)
should also check the body dict. One approach: after building span_data (which calls _extract_params), also extract the stream flag from params. For example, move the _extract_params call earlier and reuse its result:
params = _extract_params(args, kwargs)
is_streaming = kwargs.get('stream', False) or params.get('stream', False)
Alternatively, extract is_streaming inside _build_chat_span_data and return it alongside span_data. Apply this fix in both the sync instrumented_complete_sync (around line 187) and async instrumented_complete (around line 159).
Was this helpful? React with 👍 or 👎 to provide feedback.
|
Regarding streaming, we also don't want to continue the old pattern with a separate log. Copy from #1586:
|
logfire/_internal/main.py
Outdated
|
|
||
| def instrument_azure_ai_inference( | ||
| self, | ||
| azure_ai_inference_client: Any = None, |
There was a problem hiding this comment.
This should use the proper type, as the other methods - should be in the TYPE_CHECKING above.
We should also have this noted in the CLAUDE.md.
logfire/_internal/main.py
Outdated
| try: | ||
| from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient | ||
| except ImportError: # pragma: no cover | ||
| raise RuntimeError( | ||
| 'The `logfire.instrument_azure_ai_inference()` method ' | ||
| 'requires the `azure-ai-inference` package.\n' | ||
| 'You can install this with:\n' | ||
| " pip install 'logfire[azure-ai-inference]'" | ||
| ) | ||
|
|
||
| from .integrations.llm_providers.azure_ai_inference import instrument_azure_ai_inference | ||
|
|
||
| self._warn_if_not_initialized_for_instrumentation() | ||
|
|
||
| if azure_ai_inference_client is None: | ||
| clients_to_instrument: list[Any] = [ChatCompletionsClient, EmbeddingsClient] | ||
| try: | ||
| from azure.ai.inference.aio import ( | ||
| ChatCompletionsClient as AsyncChatCompletionsClient, | ||
| EmbeddingsClient as AsyncEmbeddingsClient, | ||
| ) | ||
|
|
||
| clients_to_instrument.extend([AsyncChatCompletionsClient, AsyncEmbeddingsClient]) | ||
| except ImportError: # pragma: no cover | ||
| pass | ||
| azure_ai_inference_client = clients_to_instrument |
There was a problem hiding this comment.
This import check should be handled the same way as the other instrumentations: in the modules of the instrumentions.
|
|
||
| ```python skip-run="true" skip-reason="external-connection" | ||
| from azure.ai.inference import ChatCompletionsClient | ||
| from azure.core.credentials import AzureKeyCredential | ||
|
|
||
| import logfire | ||
|
|
||
| client = ChatCompletionsClient( | ||
| endpoint='https://my-endpoint.inference.ai.azure.com', | ||
| credential=AzureKeyCredential('my-api-key'), | ||
| ) | ||
|
|
||
| logfire.configure() | ||
| logfire.instrument_azure_ai_inference(client) | ||
|
|
||
| response = client.complete( | ||
| model='gpt-4', | ||
| messages=[{'role': 'user', 'content': 'What is four plus five?'}], | ||
| ) | ||
| print(response.choices[0].message.content) |
There was a problem hiding this comment.
This should be padding 4 characeters >>.
…ion module Address @Kludex's PR review: use TYPE_CHECKING imports with proper union type for the azure_ai_inference_client parameter (matching OpenAI/Anthropic pattern), and move the ImportError check + client list building from main.py to the integration module.
Summary
instrument_azure_ai_inference()to instrumentChatCompletionsClient.complete()andEmbeddingsClient.embed()from theazure-ai-inferenceSDKhttpx.request()pattern)Test plan
uv run pytest tests/otel_integrations/test_azure_ai_inference.py- 11/11 passinguv run pytest tests/test_logfire_api.py- logfire-api stub verifiedmake lint- cleanmake typecheck- zero errorsuv run mkdocs build --no-strict- docs build successfully