Skip to content

Commit 3a97b15

Browse files
authoredMar 17, 2025
Stronger tracing tests with inline-snapshot (openai#25)
`assert len(spans) == 12` is a very weak assertion. This PR asserts the exported traces and spans more precisely in a readable tree format. And when the format of an exported trace/span changes (e.g. a new key is added to every span), you can use `pytest --inline-snapshot=fix` to update all relevant tests automatically. See https://15r10nk.github.io/inline-snapshot/latest/ for more info.
2 parents d0a7b00 + 7eb2bce commit 3a97b15

5 files changed

+840
-4
lines changed
 

‎tests/test_agent_tracing.py

+114-1
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import asyncio
44

55
import pytest
6+
from inline_snapshot import snapshot
67

78
from agents import Agent, RunConfig, Runner, trace
89

910
from .fake_model import FakeModel
1011
from .test_responses import get_text_message
11-
from .testing_processor import fetch_ordered_spans, fetch_traces
12+
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
1213

1314

1415
@pytest.mark.asyncio
@@ -25,6 +26,25 @@ async def test_single_run_is_single_trace():
2526
traces = fetch_traces()
2627
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
2728

29+
assert fetch_normalized_spans() == snapshot(
30+
[
31+
{
32+
"workflow_name": "Agent workflow",
33+
"children": [
34+
{
35+
"type": "agent",
36+
"data": {
37+
"name": "test_agent",
38+
"handoffs": [],
39+
"tools": [],
40+
"output_type": "str",
41+
},
42+
}
43+
],
44+
}
45+
]
46+
)
47+
2848
spans = fetch_ordered_spans()
2949
assert len(spans) == 1, (
3050
f"Got {len(spans)}, but expected 1: the agent span. data:"
@@ -52,6 +72,39 @@ async def test_multiple_runs_are_multiple_traces():
5272
traces = fetch_traces()
5373
assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
5474

75+
assert fetch_normalized_spans() == snapshot(
76+
[
77+
{
78+
"workflow_name": "Agent workflow",
79+
"children": [
80+
{
81+
"type": "agent",
82+
"data": {
83+
"name": "test_agent_1",
84+
"handoffs": [],
85+
"tools": [],
86+
"output_type": "str",
87+
},
88+
}
89+
],
90+
},
91+
{
92+
"workflow_name": "Agent workflow",
93+
"children": [
94+
{
95+
"type": "agent",
96+
"data": {
97+
"name": "test_agent_1",
98+
"handoffs": [],
99+
"tools": [],
100+
"output_type": "str",
101+
},
102+
}
103+
],
104+
},
105+
]
106+
)
107+
55108
spans = fetch_ordered_spans()
56109
assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
57110

@@ -79,6 +132,43 @@ async def test_wrapped_trace_is_single_trace():
79132
traces = fetch_traces()
80133
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
81134

135+
assert fetch_normalized_spans() == snapshot(
136+
[
137+
{
138+
"workflow_name": "test_workflow",
139+
"children": [
140+
{
141+
"type": "agent",
142+
"data": {
143+
"name": "test_agent_1",
144+
"handoffs": [],
145+
"tools": [],
146+
"output_type": "str",
147+
},
148+
},
149+
{
150+
"type": "agent",
151+
"data": {
152+
"name": "test_agent_1",
153+
"handoffs": [],
154+
"tools": [],
155+
"output_type": "str",
156+
},
157+
},
158+
{
159+
"type": "agent",
160+
"data": {
161+
"name": "test_agent_1",
162+
"handoffs": [],
163+
"tools": [],
164+
"output_type": "str",
165+
},
166+
},
167+
],
168+
}
169+
]
170+
)
171+
82172
spans = fetch_ordered_spans()
83173
assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
84174

@@ -97,6 +187,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
97187

98188
traces = fetch_traces()
99189
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
190+
assert fetch_normalized_spans() == snapshot([])
191+
100192
spans = fetch_ordered_spans()
101193
assert len(spans) == 0, (
102194
f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
@@ -116,6 +208,8 @@ async def test_manual_disabling_works():
116208

117209
traces = fetch_traces()
118210
assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
211+
assert fetch_normalized_spans() == snapshot([])
212+
119213
spans = fetch_ordered_spans()
120214
assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
121215

@@ -164,6 +258,25 @@ async def test_not_starting_streaming_creates_trace():
164258
traces = fetch_traces()
165259
assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
166260

261+
assert fetch_normalized_spans() == snapshot(
262+
[
263+
{
264+
"workflow_name": "Agent workflow",
265+
"children": [
266+
{
267+
"type": "agent",
268+
"data": {
269+
"name": "test_agent",
270+
"handoffs": [],
271+
"tools": [],
272+
"output_type": "str",
273+
},
274+
}
275+
],
276+
}
277+
]
278+
)
279+
167280
spans = fetch_ordered_spans()
168281
assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
169282

‎tests/test_responses_tracing.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import pytest
2+
from inline_snapshot import snapshot
23
from openai import AsyncOpenAI
34
from openai.types.responses import ResponseCompletedEvent
45

56
from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace
67
from agents.tracing.span_data import ResponseSpanData
78
from tests import fake_model
89

9-
from .testing_processor import fetch_ordered_spans
10+
from .testing_processor import fetch_normalized_spans, fetch_ordered_spans
1011

1112

1213
class DummyTracing:
@@ -54,6 +55,15 @@ async def dummy_fetch_response(
5455
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
5556
)
5657

58+
assert fetch_normalized_spans() == snapshot(
59+
[
60+
{
61+
"workflow_name": "test",
62+
"children": [{"type": "response", "data": {"response_id": "dummy-id"}}],
63+
}
64+
]
65+
)
66+
5767
spans = fetch_ordered_spans()
5868
assert len(spans) == 1
5969

@@ -82,6 +92,10 @@ async def dummy_fetch_response(
8292
"instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
8393
)
8494

95+
assert fetch_normalized_spans() == snapshot(
96+
[{"workflow_name": "test", "children": [{"type": "response"}]}]
97+
)
98+
8599
spans = fetch_ordered_spans()
86100
assert len(spans) == 1
87101
assert spans[0].span_data.response is None
@@ -107,6 +121,8 @@ async def dummy_fetch_response(
107121
"instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
108122
)
109123

124+
assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
125+
110126
spans = fetch_ordered_spans()
111127
assert len(spans) == 0
112128

@@ -139,6 +155,15 @@ async def __aiter__(self):
139155
):
140156
pass
141157

158+
assert fetch_normalized_spans() == snapshot(
159+
[
160+
{
161+
"workflow_name": "test",
162+
"children": [{"type": "response", "data": {"response_id": "dummy-id-123"}}],
163+
}
164+
]
165+
)
166+
142167
spans = fetch_ordered_spans()
143168
assert len(spans) == 1
144169
assert isinstance(spans[0].span_data, ResponseSpanData)
@@ -174,6 +199,10 @@ async def __aiter__(self):
174199
):
175200
pass
176201

202+
assert fetch_normalized_spans() == snapshot(
203+
[{"workflow_name": "test", "children": [{"type": "response"}]}]
204+
)
205+
177206
spans = fetch_ordered_spans()
178207
assert len(spans) == 1
179208
assert isinstance(spans[0].span_data, ResponseSpanData)
@@ -208,5 +237,7 @@ async def __aiter__(self):
208237
):
209238
pass
210239

240+
assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
241+
211242
spans = fetch_ordered_spans()
212243
assert len(spans) == 0

0 commit comments

Comments
 (0)