From 6f67df0c0fd8476d8e0ae0d3204b1bb03760a8a9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 22 May 2026 11:59:00 +0200 Subject: [PATCH 01/13] fix(openai-agents): Remove redundant hosted MCP tool spans --- .../openai_agents/spans/ai_client.py | 2 - .../integrations/openai_agents/utils.py | 24 +- .../openai_agents/test_openai_agents.py | 523 ------------------ 3 files changed, 1 insertion(+), 548 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index b060c29aaf..564d325416 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -5,7 +5,6 @@ from ..consts import SPAN_ORIGIN from ..utils import ( - _create_mcp_execute_tool_spans, _set_agent_data, _set_input_data, _set_output_data, @@ -55,7 +54,6 @@ def update_ai_client_span( if hasattr(response, "output") and response.output: _set_output_data(span, response) - _create_mcp_execute_tool_spans(span, response) if response_model is not None: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 5ffdb915ba..78f0a90f65 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -14,7 +14,7 @@ set_data_normalized, truncate_and_annotate_messages, ) -from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS +from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import event_from_exception, safe_serialize @@ -215,25 +215,3 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) - - -def _create_mcp_execute_tool_spans( - span: "sentry_sdk.tracing.Span", result: "agents.Result" -) -> None: - for output in result.output: - if output.__class__.__name__ == "McpCall": - with sentry_sdk.start_span( - op=OP.GEN_AI_EXECUTE_TOOL, - name=f"execute_tool {output.name}", - start_timestamp=span.start_timestamp, - ) as execute_tool_span: - execute_tool_span.set_data(SPANDATA.GEN_AI_TOOL_NAME, output.name) - if should_send_default_pii(): - execute_tool_span.set_data( - SPANDATA.GEN_AI_TOOL_INPUT, output.arguments - ) - execute_tool_span.set_data( - SPANDATA.GEN_AI_TOOL_OUTPUT, output.output - ) - if output.error: - execute_tool_span.set_status(SPANSTATUS.INTERNAL_ERROR) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 6e49b2b08e..2cc33d6fd7 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -16,7 +16,6 @@ ) from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError from agents.items import ( - McpCall, ResponseFunctionToolCall, ResponseOutputMessage, ResponseOutputText, @@ -3123,528 +3122,6 @@ async def test_span_status_error( assert transaction["contexts"]["trace"]["status"] == "internal_error" -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_spans( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP (Model Context Protocol) tool calls create execute_tool spans. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_123", - name="test_mcp_tool", - arguments='{"query": "search term"}', - output="MCP tool executed successfully", - error=None, - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using MCP tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.input"] - == '{"query": "search term"}' - ) - assert ( - mcp_tool_span["attributes"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) - - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "error" - assert mcp_tool_span.get("tags", {}).get("status") != "error" - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' - assert ( - mcp_tool_span["data"]["gen_ai.tool.output"] - == "MCP tool executed successfully" - ) - - # Verify no error status since error was None - assert mcp_tool_span.get("status") != "internal_error" - assert mcp_tool_span.get("tags", {}).get("status") != "internal_error" - - -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_with_error( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP tool calls with errors are tracked with error status. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_error_123", - name="failing_mcp_tool", - arguments='{"query": "test"}', - output=None, - error="MCP tool execution failed", - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed using MCP tool", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span with error - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool failing_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created with error status - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.input"] == '{"query": "test"}' - - # Verify error status was set - assert mcp_tool_span["status"] == "error" - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=True, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use failing MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span with error - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool failing_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created with error status - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' - assert mcp_tool_span["data"]["gen_ai.tool.output"] is None - - # Verify error status was set - assert mcp_tool_span["status"] == "internal_error" - assert mcp_tool_span["tags"]["status"] == "internal_error" - - -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_mcp_tool_execution_without_pii( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - stream_gen_ai_spans, -): - """ - Test that MCP tool input/output are not included when send_default_pii is False. - """ - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent = test_agent.clone(model=model) - - mcp_response = get_model_response( - Response( - id="resp_mcp_123", - output=[ - McpCall( - id="mcp_call_pii_123", - name="test_mcp_tool", - arguments='{"query": "sensitive data"}', - output="Result with sensitive info", - error=None, - type="mcp_call", - server_label="test_server", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ), - serialize_pydantic=True, - ) - - final_response = get_model_response( - Response( - id="resp_final_123", - output=[ - ResponseOutputMessage( - id="msg_final", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Task completed", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4.1-2025-04-14", - object="response", - usage=ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=5, - ), - total_tokens=25, - ), - ), - serialize_pydantic=True, - ) - - if stream_gen_ai_spans: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - stream_gen_ai_spans=stream_gen_ai_spans, - ) - - items = capture_items("span", "transaction") - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - spans = [item.payload for item in items if item.type == "span"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("name") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["name"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["attributes"]["gen_ai.tool.name"] == "test_mcp_tool" - - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["attributes"] - assert "gen_ai.tool.output" not in mcp_tool_span["attributes"] - else: - with patch.object( - agent.model._client._client, - "send", - side_effect=[mcp_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - send_default_pii=False, # PII disabled - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() - - await agents.Runner.run( - agent, - "Please use MCP tool", - run_config=test_run_config, - ) - - (transaction,) = events - spans = transaction["spans"] - - # Find the MCP execute_tool span - mcp_tool_span = None - for span in spans: - if span.get("description") == "execute_tool test_mcp_tool": - mcp_tool_span = span - break - - # Verify the MCP tool span was created but without input/output - assert mcp_tool_span is not None, "MCP execute_tool span was not created" - assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" - assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" - - # Verify input and output are not included when send_default_pii is False - assert "gen_ai.tool.input" not in mcp_tool_span["data"] - assert "gen_ai.tool.output" not in mcp_tool_span["data"] - - @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_asyncio( From 0f53ac0f13b9a60dab11c91e72bdaa60add9c0a5 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 08:09:28 +0200 Subject: [PATCH 02/13] feat(openai-agents): Support span streaming --- .../openai_agents/patches/agent_run.py | 17 +- .../openai_agents/patches/models.py | 17 +- .../openai_agents/patches/runner.py | 13 +- .../openai_agents/spans/agent_workflow.py | 9 + .../openai_agents/spans/ai_client.py | 41 +- .../openai_agents/spans/execute_tool.py | 54 +- .../openai_agents/spans/handoff.py | 36 +- .../openai_agents/spans/invoke_agent.py | 40 +- .../integrations/openai_agents/utils.py | 89 +- .../openai_agents/test_openai_agents.py | 1749 +++++++++++++++-- 10 files changed, 1807 insertions(+), 258 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py index 6e7f0f2820..01e4798eda 100644 --- a/sentry_sdk/integrations/openai_agents/patches/agent_run.py +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -3,6 +3,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import capture_internal_exceptions, reraise from ..spans import ( @@ -108,7 +109,13 @@ async def _run_single_turn( context_wrapper, agent, should_run_agent_start_hooks, kwargs ) - if span is None or span.timestamp is not None: + if ( + span is None + or isinstance(span, StreamedSpan) + and span.end_timestamp is not None + or not isinstance(span, StreamedSpan) + and span.timestamp is not None + ): return await original_run_single_turn(*args, **kwargs) try: @@ -188,7 +195,13 @@ async def _run_single_turn_streamed( is_streaming=True, ) - if span is None or span.timestamp is not None: + if ( + span is None + or isinstance(span, StreamedSpan) + and span.end_timestamp is not None + or not isinstance(span, StreamedSpan) + and span.timestamp is not None + ): return await original_run_single_turn_streamed(*args, **kwargs) try: diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py index 1f684a6d60..c8fe2b28f8 100644 --- a/sentry_sdk/integrations/openai_agents/patches/models.py +++ b/sentry_sdk/integrations/openai_agents/patches/models.py @@ -6,6 +6,7 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing import BAGGAGE_HEADER_NAME from sentry_sdk.tracing_utils import ( add_sentry_baggage_to_headers, @@ -34,7 +35,10 @@ def _set_response_model_on_agent_span( if response_model: agent_span = getattr(agent, "_sentry_agent_span", None) if agent_span: - agent_span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + if isinstance(agent_span, StreamedSpan): + agent_span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + else: + agent_span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) def _inject_trace_propagation_headers( @@ -151,7 +155,12 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any": for hosted_tool in hosted_tools: _inject_trace_propagation_headers(hosted_tool, span=span) - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) + set_on_span = ( + span.set_attribute + if isinstance(span, StreamedSpan) + else span.set_data + ) + set_on_span(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) streaming_response = None ttft_recorded = False @@ -162,9 +171,7 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any": # Detect first content token (text delta event) if not ttft_recorded and hasattr(event, "delta"): ttft = time.perf_counter() - start_time - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft - ) + set_on_span(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft) ttft_recorded = True # Capture the full response from ResponseCompletedEvent diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 6828ab4855..568839dbda 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -4,6 +4,7 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import capture_internal_exceptions, reraise from ..spans import agent_workflow_span, update_invoke_agent_span @@ -43,9 +44,15 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": conversation_id = kwargs.get("conversation_id") if conversation_id: agent._sentry_conversation_id = conversation_id - workflow_span.set_data( - SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id - ) + + if isinstance(workflow_span, StreamedSpan): + workflow_span.set_attribute( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) + else: + workflow_span.set_data( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) args = (agent, *args[1:]) try: diff --git a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py index 7874ad2483..d89443c44a 100644 --- a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py +++ b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py @@ -2,6 +2,7 @@ import sentry_sdk from sentry_sdk.ai.utils import get_start_span_function +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN @@ -11,6 +12,14 @@ def agent_workflow_span(agent: "agents.Agent") -> "sentry_sdk.tracing.Span": # Create a transaction or a span if an transaction is already active + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"{agent.name} workflow", attributes={"sentry.origin": SPAN_ORIGIN} + ) + + return span + span = get_start_span_function()( name=f"{agent.name} workflow", origin=SPAN_ORIGIN, diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index 564d325416..3e53f7681c 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -2,6 +2,8 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN from ..utils import ( @@ -12,7 +14,7 @@ ) if TYPE_CHECKING: - from typing import Any, Optional + from typing import Any, Optional, Union from agents import Agent @@ -28,13 +30,24 @@ def ai_client_span( elif hasattr(agent, "_sentry_request_model"): model_name = agent._sentry_request_model - span = sentry_sdk.start_span( - op=OP.GEN_AI_CHAT, - name=f"chat {model_name}", - origin=SPAN_ORIGIN, - ) - # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"chat {model_name}", + attributes={ + "sentry.op": OP.GEN_AI_CHAT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "chat", + }, + ) + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_CHAT, + name=f"chat {model_name}", + origin=SPAN_ORIGIN, + ) + # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") _set_agent_data(span, agent) _set_input_data(span, get_response_kwargs) @@ -43,7 +56,7 @@ def ai_client_span( def update_ai_client_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", response: "Any", response_model: "Optional[str]" = None, agent: "Optional[Agent]" = None, @@ -55,13 +68,17 @@ def update_ai_client_span( if hasattr(response, "output") and response.output: _set_output_data(span, response) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + if response_model is not None: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + set_on_span(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) elif hasattr(response, "model") and response.model: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, str(response.model)) + set_on_span(SPANDATA.GEN_AI_RESPONSE_MODEL, str(response.model)) # Set conversation ID from agent if available if agent: conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + set_on_span(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py index 6e690d59bb..ba9ce3a9b3 100644 --- a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py +++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py @@ -3,12 +3,14 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import SpanStatus, StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN from ..utils import _set_agent_data if TYPE_CHECKING: - from typing import Any + from typing import Any, Union import agents @@ -16,26 +18,43 @@ def execute_tool_span( tool: "agents.Tool", *args: "Any", **kwargs: "Any" ) -> "sentry_sdk.tracing.Span": - span = sentry_sdk.start_span( - op=OP.GEN_AI_EXECUTE_TOOL, - name=f"execute_tool {tool.name}", - origin=SPAN_ORIGIN, - ) + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"execute_tool {tool.name}", + attributes={ + "sentry.op": OP.GEN_AI_EXECUTE_TOOL, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "execute_tool", + SPANDATA.GEN_AI_TOOL_NAME: tool.name, + SPANDATA.GEN_AI_TOOL_DESCRIPTION: tool.description, + }, + ) + + set_on_span = span.set_attribute + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + name=f"execute_tool {tool.name}", + origin=SPAN_ORIGIN, + ) + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") + span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name) + span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description) - span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name) - span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description) + set_on_span = span.set_data if should_send_default_pii(): input = args[1] - span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, input) + set_on_span(SPANDATA.GEN_AI_TOOL_INPUT, input) return span def update_execute_tool_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "agents.Agent", tool: "agents.Tool", result: "Any", @@ -45,12 +64,19 @@ def update_execute_tool_span( if isinstance(result, str) and result.startswith( "An error occurred while running the tool" ): - span.set_status(SPANSTATUS.INTERNAL_ERROR) + if isinstance(span, StreamedSpan): + span.status = SpanStatus.ERROR + else: + span.set_status(SPANSTATUS.INTERNAL_ERROR) + + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) if should_send_default_pii(): - span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, result) + set_on_span(SPANDATA.GEN_AI_TOOL_OUTPUT, result) # Add conversation ID from agent conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + set_on_span(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py index 979a4fb7df..734eed0908 100644 --- a/sentry_sdk/integrations/openai_agents/spans/handoff.py +++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py @@ -2,6 +2,7 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN @@ -12,14 +13,29 @@ def handoff_span( context: "agents.RunContextWrapper", from_agent: "agents.Agent", to_agent_name: str ) -> None: - with sentry_sdk.start_span( - op=OP.GEN_AI_HANDOFF, - name=f"handoff from {from_agent.name} to {to_agent_name}", - origin=SPAN_ORIGIN, - ) as span: - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + with sentry_sdk.traces.start_span( + name=f"handoff from {from_agent.name} to {to_agent_name}", + attributes={ + "sentry.op": OP.GEN_AI_HANDOFF, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "handoff", + }, + ) as span: + # Add conversation ID from agent + conv_id = getattr(from_agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + else: + with sentry_sdk.start_span( + op=OP.GEN_AI_HANDOFF, + name=f"handoff from {from_agent.name} to {to_agent_name}", + origin=SPAN_ORIGIN, + ) as span: + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") - # Add conversation ID from agent - conv_id = getattr(from_agent, "_sentry_conversation_id", None) - if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + # Add conversation ID from agent + conv_id = getattr(from_agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 6f7dda3982..2f31346450 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -9,13 +9,15 @@ ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN from ..utils import _set_agent_data, _set_usage_data if TYPE_CHECKING: - from typing import Any + from typing import Any, Union import agents @@ -23,15 +25,26 @@ def invoke_agent_span( context: "agents.RunContextWrapper", agent: "agents.Agent", kwargs: "dict[str, Any]" ) -> "sentry_sdk.tracing.Span": - start_span_function = get_start_span_function() - span = start_span_function( - op=OP.GEN_AI_INVOKE_AGENT, - name=f"invoke_agent {agent.name}", - origin=SPAN_ORIGIN, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"invoke_agent {agent.name}", + attributes={ + "sentry.op": OP.GEN_AI_INVOKE_AGENT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "invoke_agent", + }, + ) + else: + start_span_function = get_start_span_function() + span = start_span_function( + op=OP.GEN_AI_INVOKE_AGENT, + name=f"invoke_agent {agent.name}", + origin=SPAN_ORIGIN, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") if should_send_default_pii(): messages = [] @@ -85,7 +98,7 @@ def invoke_agent_span( def update_invoke_agent_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", context: "agents.RunContextWrapper", agent: "agents.Agent", output: "Any" = None, @@ -100,4 +113,7 @@ def update_invoke_agent_span( # Add conversation ID from agent conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + else: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 78f0a90f65..224a5f66ba 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -17,10 +17,12 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import should_truncate_gen_ai_input from sentry_sdk.utils import event_from_exception, safe_serialize if TYPE_CHECKING: - from typing import Any + from typing import Any, Union from agents import TResponseInputItem, Usage @@ -42,17 +44,21 @@ def _capture_exception(exc: "Any") -> None: sentry_sdk.capture_event(event, hint=hint) -def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "agents.Agent") -> None: - span.set_data( +def _set_agent_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "agents.Agent" +) -> None: + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + + set_on_span( SPANDATA.GEN_AI_SYSTEM, "openai" ) # See footnote for https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/#gen-ai-system for explanation why. - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent.name) + set_on_span(SPANDATA.GEN_AI_AGENT_NAME, agent.name) if agent.model_settings.max_tokens: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens - ) + set_on_span(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens) # Get model name from agent.model or fall back to request model (for when agent.model is None/default) model_name = None @@ -62,51 +68,57 @@ def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "agents.Agent") -> N model_name = agent._sentry_request_model if model_name: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) if agent.model_settings.presence_penalty: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, agent.model_settings.presence_penalty, ) if agent.model_settings.temperature: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_TEMPERATURE, agent.model_settings.temperature ) if agent.model_settings.top_p: - span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p) + set_on_span(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p) if agent.model_settings.frequency_penalty: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, agent.model_settings.frequency_penalty, ) if len(agent.tools) > 0: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize([vars(tool) for tool in agent.tools]), ) -def _set_usage_data(span: "sentry_sdk.tracing.Span", usage: "Usage") -> None: - span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) - span.set_data( +def _set_usage_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", usage: "Usage" +) -> None: + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + set_on_span(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) + set_on_span( SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, usage.input_tokens_details.cached_tokens, ) - span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) - span.set_data( + set_on_span(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) + set_on_span( SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING, usage.output_tokens_details.reasoning_tokens, ) - span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) + set_on_span(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) def _set_input_data( - span: "sentry_sdk.tracing.Span", get_response_kwargs: "dict[str, Any]" + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", + get_response_kwargs: "dict[str, Any]", ) -> None: if not should_send_default_pii(): return @@ -131,10 +143,16 @@ def _set_input_data( instructions_text_parts += _transform_system_instructions(system_instructions) if len(instructions_text_parts) > 0: - span.set_data( - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - json.dumps(instructions_text_parts), - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps(instructions_text_parts), + ) + else: + span.set_data( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps(instructions_text_parts), + ) non_system_messages = [ message for message in messages if not _is_system_instruction(message) @@ -173,9 +191,9 @@ def _set_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -186,7 +204,9 @@ def _set_input_data( ) -def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: +def _set_output_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", result: "Any" +) -> None: if not should_send_default_pii(): return @@ -207,9 +227,16 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: output_messages["response"].append(output_message.dict()) if len(output_messages["tool"]) > 0: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"]) - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + safe_serialize(output_messages["tool"]), + ) + else: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + safe_serialize(output_messages["tool"]), + ) if len(output_messages["response"]) > 0: set_data_normalized( diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 2cc33d6fd7..69113e3f71 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -31,6 +31,7 @@ from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize +from sentry_sdk.integrations.stdlib import StdlibIntegration from sentry_sdk.utils import package_version, parse_version OPENAI_VERSION = package_version("openai") @@ -153,6 +154,7 @@ def test_agent_custom_model(): ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( @@ -163,6 +165,7 @@ async def test_agent_invocation_span_no_pii( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) @@ -172,7 +175,71 @@ async def test_agent_invocation_span_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=False, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert spans[2]["name"] == "test_agent workflow" + assert spans[2]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) + assert "gen_ai.request.messages" not in invoke_agent_span["attributes"] + assert "gen_ai.response.text" not in invoke_agent_span["attributes"] + + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -290,6 +357,7 @@ async def test_agent_invocation_span_no_pii( assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -396,6 +464,7 @@ async def test_agent_invocation_span( request, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for agent invocations. @@ -408,7 +477,7 @@ async def test_agent_invocation_span( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: with patch.object( agent.model._client._client, "send", @@ -416,12 +485,14 @@ async def test_agent_invocation_span( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, ) - items = capture_items("span", "transaction") + items = capture_items("span") result = await agents.Runner.run( agent, @@ -432,13 +503,12 @@ async def test_agent_invocation_span( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = (item.payload for item in items if item.type == "transaction") - - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + sentry_sdk.flush() + spans = [item.payload for item in items] + ai_client_span, invoke_agent_span, workflow_span = spans - spans = [item.payload for item in items if item.type == "span"] - invoke_agent_span, ai_client_span = spans + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" assert invoke_agent_span["name"] == "invoke_agent test_agent" @@ -697,7 +767,7 @@ async def test_agent_invocation_span( assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - else: + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -709,7 +779,8 @@ async def test_agent_invocation_span( send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, ) - events = capture_events() + + items = capture_items("span", "transaction") result = await agents.Runner.run( agent, @@ -720,21 +791,22 @@ async def test_agent_invocation_span( assert result is not None assert result.final_output == "Hello, how can I help you?" - (transaction,) = events - spans = transaction["spans"] - invoke_agent_span, ai_client_span = spans + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["transaction"] == "test_agent workflow" assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" - assert invoke_agent_span["description"] == "invoke_agent test_agent" + spans = [item.payload for item in items if item.type == "span"] + invoke_agent_span, ai_client_span = spans + + assert invoke_agent_span["name"] == "invoke_agent test_agent" # Only first case checks "gen_ai.request.messages" until further input handling work. param_id = request.node.callspec.id if "string" in param_id and instructions is None: # type: ignore - assert "gen_ai.system_instructions" not in ai_client_span["data"] + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - assert invoke_agent_span["data"][ + assert invoke_agent_span["attributes"][ "gen_ai.request.messages" ] == safe_serialize( [ @@ -744,9 +816,8 @@ async def test_agent_invocation_span( }, ] ) - elif "string" in param_id: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -757,15 +828,30 @@ async def test_agent_invocation_span( ] ) elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks_no_type" in param_id: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -776,16 +862,46 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "blocks" in param_id: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -796,8 +912,23 @@ async def test_agent_invocation_span( {"type": "text", "content": "You are a helpful assistant."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -805,8 +936,23 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif "parts_no_type" in param_id: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -818,8 +964,23 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] elif instructions is None: # type: ignore - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -827,8 +988,23 @@ async def test_agent_invocation_span( {"type": "text", "content": "Be concise and clear."}, ] ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] else: - assert ai_client_span["data"][ + assert ai_client_span["attributes"][ "gen_ai.system_instructions" ] == safe_serialize( [ @@ -841,29 +1017,213 @@ async def test_agent_invocation_span( ] ) + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + assert ( - invoke_agent_span["data"]["gen_ai.response.text"] + invoke_agent_span["attributes"]["gen_ai.response.text"] == "Hello, how can I help you?" ) - assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" - assert invoke_agent_span["data"]["gen_ai.system"] == "openai" - assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" - assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 - assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" - assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 - assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 - - assert ai_client_span["description"] == "chat gpt-4" - assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" - assert ai_client_span["data"]["gen_ai.system"] == "openai" - assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" - assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 - assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" - assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 - assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + else: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) + events = capture_events() + + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["description"] == "invoke_agent test_agent" + + # Only first case checks "gen_ai.request.messages" until further input handling work. + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore + assert "gen_ai.system_instructions" not in ai_client_span["data"] + + assert invoke_agent_span["data"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "content": [{"text": "Test input", "type": "text"}], + "role": "user", + }, + ] + ) + + elif "string" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) + elif "blocks_no_type" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks_no_type" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id and instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "blocks" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + elif "parts_no_type" in param_id and instructions is None: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif "parts_no_type" in param_id: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + elif instructions is None: # type: ignore + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + else: + assert ai_client_span["data"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + assert ( + invoke_agent_span["data"]["gen_ai.response.text"] + == "Hello, how can I help you?" + ) + + assert invoke_agent_span["data"]["gen_ai.operation.name"] == "invoke_agent" + assert invoke_agent_span["data"]["gen_ai.system"] == "openai" + assert invoke_agent_span["data"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["data"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["data"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["data"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["data"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["description"] == "chat gpt-4" + assert ai_client_span["data"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["data"]["gen_ai.system"] == "openai" + assert ai_client_span["data"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["data"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 + + +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_client_span_custom_model( @@ -874,6 +1234,7 @@ async def test_client_span_custom_model( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration uses the correct model name if a custom model is used. @@ -887,7 +1248,7 @@ async def test_client_span_custom_model( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -895,8 +1256,12 @@ async def test_client_span_custom_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span") @@ -908,6 +1273,7 @@ async def test_client_span_custom_model( assert result is not None assert result.final_output == "Hello, how can I help you?" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -943,6 +1309,7 @@ async def test_client_span_custom_model( assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_agent_invocation_span_sync_no_pii( sentry_init, @@ -952,6 +1319,7 @@ def test_agent_invocation_span_sync_no_pii( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for agent invocations. @@ -964,7 +1332,69 @@ def test_agent_invocation_span_sync_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=False, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = agents.Runner.run_sync( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[2]["name"] == "test_agent workflow" + assert spans[2]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -1076,6 +1506,7 @@ def test_agent_invocation_span_sync_no_pii( assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "instructions", @@ -1181,6 +1612,7 @@ def test_agent_invocation_span_sync( request, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for agent invocations. @@ -1193,7 +1625,278 @@ def test_agent_invocation_span_sync( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + ai_client_span, invoke_agent_span, workflow_span = spans + + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + param_id = request.node.callspec.id + if "string" in param_id and instructions is None: # type: ignore + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] + elif "string" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + ] + ) + elif "blocks_no_type" in param_id and instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif "blocks_no_type" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif "blocks" in param_id and instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif "blocks" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif "parts_no_type" in param_id and instructions is None: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif "parts_no_type" in param_id: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif instructions is None: # type: ignore + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + else: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ) + + assert json.loads( + ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) == [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ] + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -1611,6 +2314,7 @@ def test_agent_invocation_span_sync( ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_handoff_span( @@ -1619,6 +2323,7 @@ async def test_handoff_span( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that handoff spans are created when agents hand off to other agents. @@ -1712,7 +2417,7 @@ async def test_handoff_span( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( primary_agent.model._client._client, "send", @@ -1720,8 +2425,12 @@ async def test_handoff_span( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("transaction", "span") @@ -1734,6 +2443,7 @@ async def test_handoff_span( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] handoff_span = next( span @@ -1781,6 +2491,7 @@ async def test_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_max_turns_before_handoff_span( @@ -1789,6 +2500,7 @@ async def test_max_turns_before_handoff_span( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Example raising agents.exceptions.AgentsException after the agent invocation span is complete. @@ -1882,7 +2594,7 @@ async def test_max_turns_before_handoff_span( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( primary_agent.model._client._client, "send", @@ -1890,8 +2602,12 @@ async def test_max_turns_before_handoff_span( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("transaction", "span") @@ -1904,121 +2620,361 @@ async def test_max_turns_before_handoff_span( max_turns=1, ) - spans = [item.payload for item in items if item.type == "span"] - handoff_span = next( - span - for span in spans - if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF - ) + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + handoff_span = next( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_HANDOFF + ) + + # Verify handoff span was created + assert handoff_span is not None + assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" + assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" + else: + with patch.object( + primary_agent.model._client._client, + "send", + side_effect=[handoff_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + ) + events = capture_events() + + with pytest.raises(MaxTurnsExceeded): + await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + max_turns=1, + ) + + (error, transaction) = events + spans = transaction["spans"] + handoff_span = next( + span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF + ) + + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] + == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + + +@pytest.mark.parametrize("span_streaming", [True, False]) +@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) +@pytest.mark.asyncio +async def test_tool_execution_span( + sentry_init, + capture_events, + capture_items, + test_agent, + get_model_response, + responses_tool_call_model_responses, + stream_gen_ai_spans, + span_streaming, +): + """ + Test tool execution span creation. + """ + + @agents.function_tool + def simple_test_tool(message: str) -> str: + """A simple tool""" + return f"Tool executed with: {message}" + + # Create agent with the tool + client = AsyncOpenAI(api_key="test-key") + model = OpenAIResponsesModel(model="gpt-4", openai_client=client) + agent_with_tool = test_agent.clone(tools=[simple_test_tool], model=model) + + responses = responses_tool_call_model_responses( + tool_name="simple_test_tool", + arguments='{"message": "hello"}', + response_model="gpt-4", + response_text="Task completed using the tool", + response_ids=iter(["resp_tool_123", "resp_final_123"]), + usages=iter( + [ + ResponseUsage( + input_tokens=10, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=5, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=15, + ), + ResponseUsage( + input_tokens=15, + input_tokens_details=InputTokensDetails( + cached_tokens=0, + ), + output_tokens=10, + output_tokens_details=OutputTokensDetails( + reasoning_tokens=0, + ), + total_tokens=25, + ), + ] + ), + ) + tool_response = get_model_response( + next(responses), + serialize_pydantic=True, + ) + final_response = get_model_response( + next(responses), + serialize_pydantic=True, + ) + + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[4]["name"] == "test_agent workflow" + assert spans[4]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + ) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) + + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" + + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ( + ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads( + ai_client_span1["attributes"]["gen_ai.response.tool_calls"] + ) == [tool_call] - # Verify handoff span was created - assert handoff_span is not None - assert handoff_span["name"] == "handoff from primary_agent to secondary_agent" - assert handoff_span["attributes"]["gen_ai.operation.name"] == "handoff" - else: - with patch.object( - primary_agent.model._client._client, - "send", - side_effect=[handoff_response, final_response], - ) as _: - sentry_init( - integrations=[OpenAIAgentsIntegration()], - traces_sample_rate=1.0, - stream_gen_ai_spans=stream_gen_ai_spans, - ) - events = capture_events() + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" - with pytest.raises(MaxTurnsExceeded): - await agents.Runner.run( - primary_agent, - "Please hand off to secondary agent", - run_config=test_run_config, - max_turns=1, - ) + tool_span_available_tool = json.loads( + tool_span["attributes"]["gen_ai.request.available_tools"] + )[0] - (error, transaction) = events - spans = transaction["spans"] - handoff_span = next( - span for span in spans if span.get("op") == OP.GEN_AI_HANDOFF - ) + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) - # Verify handoff span was created - assert handoff_span is not None + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" assert ( - handoff_span["description"] - == "handoff from primary_agent to secondary_agent" + tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" ) - assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" - - -@pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) -@pytest.mark.asyncio -async def test_tool_execution_span( - sentry_init, - capture_events, - capture_items, - test_agent, - get_model_response, - responses_tool_call_model_responses, - stream_gen_ai_spans, -): - """ - Test tool execution span creation. - """ + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" - @agents.function_tool - def simple_test_tool(message: str) -> str: - """A simple tool""" - return f"Tool executed with: {message}" + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] - # Create agent with the tool - client = AsyncOpenAI(api_key="test-key") - model = OpenAIResponsesModel(model="gpt-4", openai_client=client) - agent_with_tool = test_agent.clone(tools=[simple_test_tool], model=model) + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) - responses = responses_tool_call_model_responses( - tool_name="simple_test_tool", - arguments='{"message": "hello"}', - response_model="gpt-4", - response_text="Task completed using the tool", - response_ids=iter(["resp_tool_123", "resp_final_123"]), - usages=iter( + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( [ - ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=5, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=15, - ), - ResponseUsage( - input_tokens=15, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=10, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=0, - ), - total_tokens=25, - ), + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, ] - ), - ) - tool_response = get_model_response( - next(responses), - serialize_pydantic=True, - ) - final_response = get_model_response( - next(responses), - serialize_pydantic=True, - ) + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - if stream_gen_ai_spans: + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -2699,6 +3655,7 @@ async def test_hosted_mcp_tool_propagation_headers( assert hosted_mcp_tool["headers"]["baggage"] == expected_outgoing_baggage +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_model_behavior_error( @@ -2707,6 +3664,7 @@ async def test_model_behavior_error( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Example raising agents.exceptions.AgentsException before the agent invocation span is complete. @@ -2721,7 +3679,64 @@ def simple_test_tool(message: str) -> str: # Create agent with the tool agent_with_tool = test_agent.clone(tools=[simple_test_tool]) - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) + + mock_get_response.side_effect = [tool_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + ( + ai_client_span1, + agent_span, + workflow_span, + ) = spans + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -2831,6 +3846,7 @@ def simple_test_tool(message: str) -> str: assert agent_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_error_handling( @@ -2839,12 +3855,61 @@ async def test_error_handling( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test error handling in agent execution. """ - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = Exception("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("event", "span") + + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + (ai_client_span, invoke_agent_span, workflow_span) = spans + + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + ) + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -2939,6 +4004,7 @@ async def test_error_handling( assert ai_client_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_error_captures_input_data( @@ -2947,6 +4013,7 @@ async def test_error_captures_input_data( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that input data is captured even when the API call raises an exception. @@ -2966,7 +4033,7 @@ async def test_error_captures_input_data( request=model_request, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -2977,9 +4044,13 @@ async def test_error_captures_input_data( OpenAIAgentsIntegration(), LoggingIntegration(event_level=logging.CRITICAL), ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("event", "span") @@ -2992,6 +4063,7 @@ async def test_error_captures_input_data( assert error_event["exception"]["values"][0]["type"] == "InternalServerError" assert error_event["exception"]["values"][0]["value"] == "Error code: 500" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -3054,6 +4126,7 @@ async def test_error_captures_input_data( assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_status_error( @@ -3062,8 +4135,42 @@ async def test_span_status_error( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = ValueError("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("event", "span") + + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + assert spans[2]["is_segment"] is True + assert spans[2]["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -3074,6 +4181,7 @@ async def test_span_status_error( OpenAIAgentsIntegration(), LoggingIntegration(event_level=logging.CRITICAL), ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, ) @@ -3092,6 +4200,7 @@ async def test_span_status_error( assert spans[0]["status"] == "error" (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" @@ -3118,10 +4227,10 @@ async def test_span_status_error( assert error["level"] == "error" assert transaction["spans"][0]["status"] == "internal_error" assert transaction["spans"][0]["tags"]["status"] == "internal_error" - - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_asyncio( @@ -3132,6 +4241,7 @@ async def test_multiple_agents_asyncio( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that multiple agents can be run at the same time in asyncio tasks @@ -3145,7 +4255,38 @@ async def test_multiple_agents_asyncio( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) + + await asyncio.gather(*[run() for _ in range(3)]) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[2]["name"] == "test_agent workflow" + assert spans[5]["name"] == "test_agent workflow" + assert spans[8]["name"] == "test_agent workflow" + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3153,6 +4294,7 @@ async def test_multiple_agents_asyncio( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, ) @@ -3250,6 +4392,7 @@ def test_openai_agents_message_role_mapping( assert stored_messages[0]["role"] == expected_role +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_error_tracing( @@ -3260,6 +4403,7 @@ async def test_tool_execution_error_tracing( get_model_response, responses_tool_call_model_responses, stream_gen_ai_spans, + span_streaming, ): """ Test that tool execution errors are properly tracked via error tracing patch. @@ -3324,7 +4468,7 @@ def failing_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -3332,9 +4476,13 @@ def failing_tool(message: str) -> str: ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3346,6 +4494,7 @@ def failing_tool(message: str) -> str: run_config=test_run_config, ) + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find the execute_tool span @@ -3412,6 +4561,7 @@ def failing_tool(message: str) -> str: assert execute_tool_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( @@ -3421,6 +4571,7 @@ async def test_invoke_agent_span_includes_usage_data( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. @@ -3469,7 +4620,7 @@ async def test_invoke_agent_span_includes_usage_data( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3477,9 +4628,13 @@ async def test_invoke_agent_span_includes_usage_data( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3489,6 +4644,7 @@ async def test_invoke_agent_span_includes_usage_data( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( span @@ -3548,6 +4704,7 @@ async def test_invoke_agent_span_includes_usage_data( assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( @@ -3557,6 +4714,7 @@ async def test_ai_client_span_includes_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that ai_client spans (gen_ai.chat) include the response model from the actual API response. @@ -3605,7 +4763,7 @@ async def test_ai_client_span_includes_response_model( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3613,9 +4771,13 @@ async def test_ai_client_span_includes_response_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3625,6 +4787,7 @@ async def test_ai_client_span_includes_response_model( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -3667,6 +4830,7 @@ async def test_ai_client_span_includes_response_model( assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( @@ -3675,6 +4839,7 @@ async def test_ai_client_span_response_model_with_chat_completions( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that response model is captured when using ChatCompletions API (not Responses API). @@ -3729,7 +4894,7 @@ async def test_ai_client_span_response_model_with_chat_completions( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3737,8 +4902,12 @@ async def test_ai_client_span_response_model_with_chat_completions( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3749,6 +4918,7 @@ async def test_ai_client_span_response_model_with_chat_completions( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -3790,6 +4960,7 @@ async def test_ai_client_span_response_model_with_chat_completions( ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_llm_calls_aggregate_usage( @@ -3799,6 +4970,7 @@ async def test_multiple_llm_calls_aggregate_usage( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls @@ -3886,7 +5058,48 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_call_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + invoke_agent_span = spans[3] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + # Cached tokens should be aggregated: 0 + 5 = 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5 + # Reasoning tokens should be aggregated: 0 + 3 = 3 + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3 + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -3962,6 +5175,7 @@ def calculator(a: int, b: int) -> int: assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( @@ -3971,6 +5185,7 @@ async def test_invoke_agent_span_includes_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans include the response model from the API response. @@ -4018,7 +5233,7 @@ async def test_invoke_agent_span_includes_response_model( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -4026,9 +5241,13 @@ async def test_invoke_agent_span_includes_response_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -4039,6 +5258,7 @@ async def test_invoke_agent_span_includes_response_model( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( span @@ -4102,6 +5322,7 @@ async def test_invoke_agent_span_includes_response_model( assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_uses_last_response_model( @@ -4111,6 +5332,7 @@ async def test_invoke_agent_span_uses_last_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that when an agent makes multiple LLM calls (e.g., with tools), @@ -4198,7 +5420,54 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[first_response, second_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + invoke_agent_span = spans[3] + first_ai_client_span = spans[0] + second_ai_client_span = spans[2] # After tool span + + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Each ai_client span has its own response model from the API + assert ( + first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613" + ) + assert ( + second_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -4526,6 +5795,7 @@ async def test_streaming_ttft_on_chat_span( assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4540,6 +5810,7 @@ async def test_conversation_id_on_all_spans( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). @@ -4553,7 +5824,52 @@ async def test_conversation_id_on_all_spans( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert spans[2]["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + + # Verify invoke_agent span has conversation_id + assert ( + invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + ) + + # Verify ai_client span has conversation_id + assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -4563,6 +5879,9 @@ async def test_conversation_id_on_all_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -4643,6 +5962,7 @@ async def test_conversation_id_on_all_spans( assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4656,6 +5976,7 @@ async def test_conversation_id_on_tool_span( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run(). @@ -4742,7 +6063,53 @@ def simple_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + await agents.Runner.run( + agent_with_tool, + "Use the tool", + run_config=test_run_config, + conversation_id="conv_tool_test_456", + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + # Find the tool span + tool_span = None + for span in spans: + if span.get("name", "").startswith("execute_tool"): + tool_span = span + break + + assert tool_span is not None + # Tool span should have the conversation_id passed to Runner.run() + assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" + + # Workflow span (transaction) should have the same conversation_id + workflow_span = spans[4] + assert workflow_span["is_segment"] is True + + # Workflow span (transaction) should have the same conversation_id + assert ( + workflow_span["attributes"]["gen_ai.conversation.id"] + == "conv_tool_test_456" + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -4826,6 +6193,7 @@ def simple_tool(message: str) -> str: ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4840,6 +6208,7 @@ async def test_no_conversation_id_when_not_provided( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is not set when not passed to Runner.run(). @@ -4853,7 +6222,49 @@ async def test_no_conversation_id_when_not_provided( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + workflow_span = spans[2] + assert workflow_span["is_segment"] is True + + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in workflow_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", From 5a5f81d2e30a84a58662f0132a8680cc4a512037 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 08:12:35 +0200 Subject: [PATCH 03/13] . --- sentry_sdk/tracing_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py index e6fc8770d6..822114628a 100644 --- a/sentry_sdk/tracing_utils.py +++ b/sentry_sdk/tracing_utils.py @@ -116,6 +116,15 @@ def has_span_streaming_enabled(options: "Optional[dict[str, Any]]") -> bool: return (options.get("_experiments") or {}).get("trace_lifecycle") == "stream" +def should_truncate_gen_ai_input(options: "Optional[dict[str, Any]]") -> bool: + if options is None: + return True + + return not options.get( + "stream_gen_ai_spans", False + ) and not has_span_streaming_enabled(options) + + @contextlib.contextmanager def record_sql_queries( cursor: "Any", From c4a536658161be344cd040da1fb41ec1d2b67c46 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 08:21:32 +0200 Subject: [PATCH 04/13] mypy --- .../integrations/openai_agents/spans/agent_workflow.py | 6 +++++- sentry_sdk/integrations/openai_agents/spans/ai_client.py | 2 +- sentry_sdk/integrations/openai_agents/spans/execute_tool.py | 2 +- sentry_sdk/integrations/openai_agents/spans/handoff.py | 2 +- sentry_sdk/integrations/openai_agents/spans/invoke_agent.py | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py index d89443c44a..758f06db8d 100644 --- a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py +++ b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py @@ -7,10 +7,14 @@ from ..consts import SPAN_ORIGIN if TYPE_CHECKING: + from typing import Union + import agents -def agent_workflow_span(agent: "agents.Agent") -> "sentry_sdk.tracing.Span": +def agent_workflow_span( + agent: "agents.Agent", +) -> "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]": # Create a transaction or a span if an transaction is already active span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) if span_streaming: diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index 3e53f7681c..f4f02cb674 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -21,7 +21,7 @@ def ai_client_span( agent: "Agent", get_response_kwargs: "dict[str, Any]" -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": # TODO-anton: implement other types of operations. Now "chat" is hardcoded. # Get model name from agent.model or fall back to request model (for when agent.model is None/default) model_name = None diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py index ba9ce3a9b3..fd3a430951 100644 --- a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py +++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py @@ -17,7 +17,7 @@ def execute_tool_span( tool: "agents.Tool", *args: "Any", **kwargs: "Any" -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) if span_streaming: span = sentry_sdk.traces.start_span( diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py index 734eed0908..ea91464afb 100644 --- a/sentry_sdk/integrations/openai_agents/spans/handoff.py +++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py @@ -26,7 +26,7 @@ def handoff_span( # Add conversation ID from agent conv_id = getattr(from_agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + span.set_attribute(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) else: with sentry_sdk.start_span( op=OP.GEN_AI_HANDOFF, diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 2f31346450..ad03db8240 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -24,7 +24,7 @@ def invoke_agent_span( context: "agents.RunContextWrapper", agent: "agents.Agent", kwargs: "dict[str, Any]" -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) if span_streaming: span = sentry_sdk.traces.start_span( From b2b0c3455f8fbfc4ed6cc54cba64ef4ecbdb5681 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 08:27:05 +0200 Subject: [PATCH 05/13] mypy2 --- .../integrations/openai_agents/patches/agent_run.py | 11 ++++++++--- .../integrations/openai_agents/patches/models.py | 4 ++-- .../integrations/openai_agents/patches/runner.py | 7 ++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py index 01e4798eda..7959f78e1d 100644 --- a/sentry_sdk/integrations/openai_agents/patches/agent_run.py +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -13,7 +13,7 @@ ) if TYPE_CHECKING: - from typing import Any, Awaitable, Callable, Optional + from typing import Any, Awaitable, Callable, Optional, Union from agents.run_internal.run_steps import SingleStepResult @@ -51,7 +51,7 @@ def _maybe_start_agent_span( should_run_agent_start_hooks: bool, span_kwargs: "dict[str, Any]", is_streaming: bool = False, -) -> "Optional[Span]": +) -> "Optional[Union[Span, StreamedSpan]]": """ Start an agent invocation span if conditions are met. Handles ending any existing span for a different agent. @@ -79,7 +79,12 @@ def _maybe_start_agent_span( context_wrapper._sentry_agent_span = span agent._sentry_agent_span = span - if is_streaming: + if not is_streaming: + return span + + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) + else: span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) return span diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py index c8fe2b28f8..634c9fdca1 100644 --- a/sentry_sdk/integrations/openai_agents/patches/models.py +++ b/sentry_sdk/integrations/openai_agents/patches/models.py @@ -17,7 +17,7 @@ from ..spans import ai_client_span, update_ai_client_span if TYPE_CHECKING: - from typing import Any, Callable, Optional + from typing import Any, Callable, Optional, Union from sentry_sdk.tracing import Span @@ -42,7 +42,7 @@ def _set_response_model_on_agent_span( def _inject_trace_propagation_headers( - hosted_tool: "HostedMCPTool", span: "Span" + hosted_tool: "HostedMCPTool", span: "Union[Span, StreamedSpan]" ) -> None: headers = hosted_tool.tool_config.get("headers") if headers is None: diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 568839dbda..2fc179f45b 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -142,7 +142,12 @@ def wrapper(*args: "Any", **kwargs: "Any") -> "Any": # Set conversation ID on workflow span early so it's captured even on errors if conversation_id: - workflow_span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) + if isinstance(workflow_span, StreamedSpan): + workflow_span.set_attribute( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) + else: + workflow_span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) # Store span on agent for cleanup agent._sentry_workflow_span = workflow_span From 53a84ae504adfec8ee56c27bcf40ebfbc24a3443 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 10:11:00 +0200 Subject: [PATCH 06/13] handle streamed span invoke agent --- sentry_sdk/integrations/openai_agents/patches/runner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 2fc179f45b..2be0a651a9 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -69,8 +69,11 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": ) if ( - invoke_agent_span is not None - and invoke_agent_span.timestamp is None + invoke_agent_span is None + or isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.end_timestamp is not None + or not isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.timestamp is not None ): update_invoke_agent_span( span=invoke_agent_span, From dcdeb8dd77e2aa4ded7afe8d5aa034fbeae2dc1d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 10:11:25 +0200 Subject: [PATCH 07/13] truncate invoke agent attributes --- .../integrations/openai_agents/spans/invoke_agent.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index ad03db8240..c21145ac4a 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -10,7 +10,10 @@ from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii from sentry_sdk.traces import StreamedSpan -from sentry_sdk.tracing_utils import has_span_streaming_enabled +from sentry_sdk.tracing_utils import ( + has_span_streaming_enabled, + should_truncate_gen_ai_input, +) from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN @@ -80,9 +83,9 @@ def invoke_agent_span( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( From 2c1bd195575f0ffbbebe5a90950f2d387c47427b Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 10:17:48 +0200 Subject: [PATCH 08/13] fix bool logic --- sentry_sdk/integrations/openai_agents/patches/runner.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 2be0a651a9..f1a5d3958d 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -68,12 +68,11 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": context_wrapper, "_sentry_agent_span", None ) - if ( - invoke_agent_span is None - or isinstance(invoke_agent_span, StreamedSpan) - and invoke_agent_span.end_timestamp is not None + if invoke_agent_span is not None and ( + isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.end_timestamp is None or not isinstance(invoke_agent_span, StreamedSpan) - and invoke_agent_span.timestamp is not None + and invoke_agent_span.timestamp is None ): update_invoke_agent_span( span=invoke_agent_span, From e243b8617e5cd868fa6abfc08b1ed55d002ade24 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 17:22:15 +0200 Subject: [PATCH 09/13] add brackets to bool logic --- .../integrations/openai_agents/patches/agent_run.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py index 7959f78e1d..71883b2eef 100644 --- a/sentry_sdk/integrations/openai_agents/patches/agent_run.py +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -116,10 +116,8 @@ async def _run_single_turn( if ( span is None - or isinstance(span, StreamedSpan) - and span.end_timestamp is not None - or not isinstance(span, StreamedSpan) - and span.timestamp is not None + or (isinstance(span, StreamedSpan) and span.end_timestamp is not None) + or (not isinstance(span, StreamedSpan) and span.timestamp is not None) ): return await original_run_single_turn(*args, **kwargs) @@ -202,10 +200,8 @@ async def _run_single_turn_streamed( if ( span is None - or isinstance(span, StreamedSpan) - and span.end_timestamp is not None - or not isinstance(span, StreamedSpan) - and span.timestamp is not None + or (isinstance(span, StreamedSpan) and span.end_timestamp is not None) + or (not isinstance(span, StreamedSpan) and span.timestamp is not None) ): return await original_run_single_turn_streamed(*args, **kwargs) From 931aa30e843f6c8848d13920508f89f7e403950f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 26 May 2026 17:48:58 +0200 Subject: [PATCH 10/13] drop transaction reference in span streaming comment --- tests/integrations/openai_agents/test_openai_agents.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 5a932870a9..b4bb7f5d3d 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -6117,11 +6117,10 @@ def simple_tool(message: str) -> str: # Tool span should have the conversation_id passed to Runner.run() assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" - # Workflow span (transaction) should have the same conversation_id + # Workflow span should have the same conversation_id workflow_span = spans[4] assert workflow_span["is_segment"] is True - # Workflow span (transaction) should have the same conversation_id assert ( workflow_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" From 3a955206a1f575c96559d006d9c7b63ef19d9733 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 27 May 2026 09:23:02 +0200 Subject: [PATCH 11/13] test(openai-agents): Deduplicate in tests by removing node.callspec.id matching --- .../openai_agents/test_openai_agents.py | 896 +++++------------- 1 file changed, 221 insertions(+), 675 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index cfe23f922a..7ca0df5fa2 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -308,17 +308,21 @@ async def test_agent_invocation_span_no_pii( @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( - "instructions", - ( - None, - "You are a coding assistant that talks like a pirate.", - ), -) -@pytest.mark.parametrize( - "input", + "instructions,input,expected_system_instructions,expected_request_messages", [ - pytest.param("Test input", id="string"), - pytest.param( + ( + None, + ("Test input"), + None, + [ + { + "content": [{"text": "Test input", "type": "text"}], + "role": "user", + }, + ], + ), + ( + "You are a coding assistant that talks like a pirate.", [ { "role": "system", @@ -333,9 +337,28 @@ async def test_agent_invocation_span_no_pii( "content": "Test input", }, ], - id="blocks_no_type", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "type": "message", @@ -353,9 +376,28 @@ async def test_agent_invocation_span_no_pii( "content": "Test input", }, ], - id="blocks", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "role": "system", @@ -373,9 +415,29 @@ async def test_agent_invocation_span_no_pii( "content": "Test input", }, ], - id="parts_no_type", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "type": "message", @@ -396,7 +458,26 @@ async def test_agent_invocation_span_no_pii( "content": "Test input", }, ], - id="parts", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), ], ) @@ -408,7 +489,8 @@ async def test_agent_invocation_span( nonstreaming_responses_model_response, instructions, input, - request, + expected_system_instructions, + expected_request_messages, get_model_response, stream_gen_ai_spans, ): @@ -457,236 +539,17 @@ async def test_agent_invocation_span( assert invoke_agent_span["name"] == "invoke_agent test_agent" - # Only first case checks "gen_ai.request.messages" until further input handling work. - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - - assert invoke_agent_span["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "content": [{"text": "Test input", "type": "text"}], - "role": "user", - }, - ] - ) - elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) assert ( invoke_agent_span["attributes"]["gen_ai.response.text"] @@ -744,117 +607,17 @@ async def test_agent_invocation_span( assert invoke_agent_span["description"] == "invoke_agent test_agent" - # Only first case checks "gen_ai.request.messages" until further input handling work. - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["data"] - - assert invoke_agent_span["data"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "content": [{"text": "Test input", "type": "text"}], - "role": "user", - }, - ] - ) - - elif "string" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks_no_type" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif "parts_no_type" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) else: assert ai_client_span["data"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) + + assert ( + json.loads(ai_client_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages[-1:] + ) assert ( invoke_agent_span["data"]["gen_ai.response.text"] @@ -1094,17 +857,21 @@ def test_agent_invocation_span_sync_no_pii( @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( - "instructions", - ( - None, - "You are a coding assistant that talks like a pirate.", - ), -) -@pytest.mark.parametrize( - "input", + "instructions,input,expected_system_instructions,expected_request_messages", [ - pytest.param("Test input", id="string"), - pytest.param( + ( + None, + ("Test input"), + None, + [ + { + "content": [{"text": "Test input", "type": "text"}], + "role": "user", + }, + ], + ), + ( + "You are a coding assistant that talks like a pirate.", [ { "role": "system", @@ -1119,9 +886,28 @@ def test_agent_invocation_span_sync_no_pii( "content": "Test input", }, ], - id="blocks_no_type", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "type": "message", @@ -1139,9 +925,28 @@ def test_agent_invocation_span_sync_no_pii( "content": "Test input", }, ], - id="blocks", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "role": "system", @@ -1159,9 +964,29 @@ def test_agent_invocation_span_sync_no_pii( "content": "Test input", }, ], - id="parts_no_type", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), - pytest.param( + ( + "You are a coding assistant that talks like a pirate.", [ { "type": "message", @@ -1182,7 +1007,26 @@ def test_agent_invocation_span_sync_no_pii( "content": "Test input", }, ], - id="parts", + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ], + [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Message demonstrating the absence of truncation.", + } + ], + }, + {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, + ], ), ], ) @@ -1194,7 +1038,8 @@ def test_agent_invocation_span_sync( nonstreaming_responses_model_response, instructions, input, - request, + expected_system_instructions, + expected_request_messages, get_model_response, stream_gen_ai_spans, sync_event_loop, @@ -1261,224 +1106,17 @@ def test_agent_invocation_span_sync( assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) else: with patch.object( agent.model._client._client, @@ -1528,104 +1166,12 @@ def test_agent_invocation_span_sync( assert ai_client_span["data"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["data"] - elif "string" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks_no_type" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "blocks" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif "parts_no_type" in param_id: - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - elif instructions is None: # type: ignore - assert ai_client_span["data"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) else: assert ai_client_span["data"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) From e1d6ebe0185c00a445291fde65ddae486c6a88a7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 27 May 2026 09:30:42 +0200 Subject: [PATCH 12/13] merge cleanup --- .../openai_agents/test_openai_agents.py | 450 +----------------- 1 file changed, 12 insertions(+), 438 deletions(-) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index fbcb0a96ca..6deb2a888f 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -609,236 +609,17 @@ async def test_agent_invocation_span( assert invoke_agent_span["name"] == "invoke_agent test_agent" - # Only first case checks "gen_ai.request.messages" until further input handling work. - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - - assert invoke_agent_span["attributes"][ - "gen_ai.request.messages" - ] == safe_serialize( - [ - { - "content": [{"text": "Test input", "type": "text"}], - "role": "user", - }, - ] - ) - elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) assert ( invoke_agent_span["attributes"]["gen_ai.response.text"] @@ -1540,224 +1321,17 @@ def test_agent_invocation_span_sync( assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 - param_id = request.node.callspec.id - if "string" in param_id and instructions is None: # type: ignore + if expected_system_instructions is None: assert "gen_ai.system_instructions" not in ai_client_span["attributes"] - elif "string" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - ] - ) - elif "blocks_no_type" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id and instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "blocks" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id and instructions is None: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif "parts_no_type" in param_id: - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] - elif instructions is None: # type: ignore - assert ai_client_span["attributes"][ - "gen_ai.system_instructions" - ] == safe_serialize( - [ - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) - - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] else: assert ai_client_span["attributes"][ "gen_ai.system_instructions" - ] == safe_serialize( - [ - { - "type": "text", - "content": "You are a coding assistant that talks like a pirate.", - }, - {"type": "text", "content": "You are a helpful assistant."}, - {"type": "text", "content": "Be concise and clear."}, - ] - ) + ] == safe_serialize(expected_system_instructions) - assert json.loads( - ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) == [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "Message demonstrating the absence of truncation.", - } - ], - }, - {"role": "user", "content": [{"type": "text", "text": "Test input"}]}, - ] + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, From 0b4ed5fe249efe2a93e05c1a693d5134d7857df0 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 27 May 2026 09:39:16 +0200 Subject: [PATCH 13/13] add bool precedence --- .../integrations/openai_agents/patches/runner.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index f1a5d3958d..f5cc26060c 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -69,10 +69,14 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": ) if invoke_agent_span is not None and ( - isinstance(invoke_agent_span, StreamedSpan) - and invoke_agent_span.end_timestamp is None - or not isinstance(invoke_agent_span, StreamedSpan) - and invoke_agent_span.timestamp is None + ( + isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.end_timestamp is None + ) + or ( + not isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.timestamp is None + ) ): update_invoke_agent_span( span=invoke_agent_span,