From a2d639b6d2814e0312d9703e4abb588d8f775578 Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 09:13:19 +0800 Subject: [PATCH 1/8] feat: emit agent spans in RealtimeSession for SDK tracing RealtimeSession.__aenter__ now creates and starts an agent_span for the starting agent, and finishes it when the session closes. On handoff, the outgoing agent's span is finished and a new span is started for the incoming agent, matching the tracing behaviour of the regular Runner. The span records the agent name, tool names, and handoff target names. Passing tracing_disabled=True in run_config suppresses all spans. Fixes #1845 --- src/agents/realtime/session.py | 31 ++++ tests/realtime/test_session.py | 8 + tests/realtime/test_session_exceptions.py | 2 + tests/realtime/test_session_spans.py | 169 ++++++++++++++++++++++ 4 files changed, 210 insertions(+) create mode 100644 tests/realtime/test_session_spans.py diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index ca809dd9c4..cbcbd02874 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -24,6 +24,8 @@ from ..run_context import RunContextWrapper, TContext from ..tool import DEFAULT_APPROVAL_REJECTION_MESSAGE, FunctionTool, invoke_function_tool from ..tool_context import ToolContext +from ..tracing import Span, agent_span +from ..tracing.span_data import AgentSpanData from ..util._approvals import evaluate_needs_approval_setting from .agent import RealtimeAgent from .config import RealtimeRunConfig, RealtimeSessionModelSettings, RealtimeUserInput @@ -193,6 +195,7 @@ def __init__( self._guardrail_tasks: set[asyncio.Task[Any]] = set() self._tool_call_tasks: set[asyncio.Task[Any]] = set() self._async_tool_calls: bool = bool(self._run_config.get("async_tool_calls", True)) + self._current_agent_span: Span[AgentSpanData] | None = None @property def model(self) -> RealtimeModel: @@ -203,6 +206,10 @@ async def __aenter__(self) -> RealtimeSession: """Start the session by connecting to the model. After this, you will be able to stream events from the model and send messages and audio to the model. """ + # Start an agent span for the initial agent. + self._current_agent_span = self._make_agent_span(self._current_agent) + self._current_agent_span.start(mark_as_current=True) + # Add ourselves as a listener self._model.add_listener(self) @@ -815,9 +822,16 @@ async def _handle_tool_call( # Store previous agent for event previous_agent = agent + # Finish the span for the outgoing agent and start one for the new agent. + if self._current_agent_span is not None: + self._current_agent_span.finish(reset_current=True) + # Update current agent self._current_agent = result + self._current_agent_span = self._make_agent_span(self._current_agent) + self._current_agent_span.start(mark_as_current=True) + # Get updated model settings from new agent updated_settings = await self._get_updated_model_settings_from_agent( starting_settings=None, @@ -1235,6 +1249,11 @@ async def _cleanup(self) -> None: self._wake_event_iterators() return + # Finish the active agent span. + if self._current_agent_span is not None: + self._current_agent_span.finish(reset_current=True) + self._current_agent_span = None + # Cancel and cleanup guardrail tasks self._cleanup_guardrail_tasks() self._cleanup_tool_call_tasks() @@ -1253,6 +1272,18 @@ async def _cleanup(self) -> None: self._closed = True self._wake_event_iterators() + def _make_agent_span(self, agent: RealtimeAgent) -> Span[AgentSpanData]: + """Create a new agent span for the given agent, respecting tracing_disabled.""" + disabled: bool = bool(self._run_config.get("tracing_disabled", False)) + handoff_names = [h.agent_name if isinstance(h, Handoff) else h.name for h in agent.handoffs] + tool_names = [t.name for t in agent.tools if isinstance(t, FunctionTool)] + return agent_span( + name=agent.name, + handoffs=handoff_names or None, + tools=tool_names or None, + disabled=disabled, + ) + async def _get_updated_model_settings_from_agent( self, starting_settings: RealtimeSessionModelSettings | None, diff --git a/tests/realtime/test_session.py b/tests/realtime/test_session.py index 03148c739a..e45db679f5 100644 --- a/tests/realtime/test_session.py +++ b/tests/realtime/test_session.py @@ -325,9 +325,11 @@ async def close(self): @pytest.fixture def mock_agent(): agent = Mock(spec=RealtimeAgent) + agent.name = "mock_agent" agent.get_all_tools = AsyncMock(return_value=[]) type(agent).handoffs = PropertyMock(return_value=[]) + type(agent).tools = PropertyMock(return_value=[]) type(agent).output_guardrails = PropertyMock(return_value=[]) return agent @@ -2463,9 +2465,11 @@ async def test_session_gets_model_settings_from_agent_during_connection(self): # Create agent with specific settings agent = Mock(spec=RealtimeAgent) + agent.name = "test_agent" agent.get_system_prompt = AsyncMock(return_value="Test agent instructions") agent.get_all_tools = AsyncMock(return_value=[{"type": "function", "name": "test_tool"}]) agent.handoffs = [] + agent.tools = [] session = RealtimeSession(mock_model, agent, None) @@ -2492,9 +2496,11 @@ async def test_model_config_overrides_model_settings_not_agent(self): mock_model.add_listener = Mock() agent = Mock(spec=RealtimeAgent) + agent.name = "test_agent" agent.get_system_prompt = AsyncMock(return_value="Agent instructions") agent.get_all_tools = AsyncMock(return_value=[{"type": "function", "name": "agent_tool"}]) agent.handoffs = [] + agent.tools = [] # Provide model config with settings model_config: RealtimeModelConfig = { @@ -2530,8 +2536,10 @@ async def test_handoffs_are_included_in_model_settings(self): # Create agent with handoffs agent = Mock(spec=RealtimeAgent) + agent.name = "test_agent" agent.get_system_prompt = AsyncMock(return_value="Agent with handoffs") agent.get_all_tools = AsyncMock(return_value=[]) + agent.tools = [] # Create a mock handoff handoff_agent = Mock(spec=RealtimeAgent) diff --git a/tests/realtime/test_session_exceptions.py b/tests/realtime/test_session_exceptions.py index da93902368..f306761154 100644 --- a/tests/realtime/test_session_exceptions.py +++ b/tests/realtime/test_session_exceptions.py @@ -89,9 +89,11 @@ async def interrupt(self) -> None: def fake_agent(): """Create a fake agent for testing.""" agent = Mock() + agent.name = "fake_agent" agent.get_all_tools = AsyncMock(return_value=[]) agent.get_system_prompt = AsyncMock(return_value="test instructions") agent.handoffs = [] + agent.tools = [] return agent diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py new file mode 100644 index 0000000000..3eea62c975 --- /dev/null +++ b/tests/realtime/test_session_spans.py @@ -0,0 +1,169 @@ +"""Tests that RealtimeSession creates agent spans for SDK-level tracing.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from agents.realtime.agent import RealtimeAgent +from agents.realtime.model import RealtimeModel, RealtimeModelConfig, RealtimeModelListener +from agents.realtime.model_events import RealtimeModelEvent +from agents.realtime.session import RealtimeSession +from agents.tracing import trace +from agents.tracing.span_data import AgentSpanData +from tests.testing_processor import SPAN_PROCESSOR_TESTING + + +class _FakeRealtimeModel(RealtimeModel): + """Minimal fake that never sends events and succeeds immediately.""" + + def __init__(self) -> None: + self._listeners: list[RealtimeModelListener] = [] + + def add_listener(self, listener: RealtimeModelListener) -> None: + self._listeners.append(listener) + + def remove_listener(self, listener: RealtimeModelListener) -> None: + if listener in self._listeners: + self._listeners.remove(listener) + + async def connect(self, options: RealtimeModelConfig) -> None: + pass + + async def close(self) -> None: + pass + + async def send_event(self, event: Any) -> None: + pass + + async def send_message( + self, message: Any, other_event_data: dict[str, Any] | None = None + ) -> None: + pass + + async def send_audio(self, audio: bytes, *, commit: bool = False) -> None: + pass + + async def send_tool_output(self, tool_call: Any, output: str, start_response: bool) -> None: + pass + + async def interrupt(self) -> None: + pass + + async def dispatch(self, event: RealtimeModelEvent) -> None: + """Send an event to all listeners (test helper).""" + for listener in self._listeners: + await listener.on_event(event) + + +def _make_session( + agent: RealtimeAgent, + model: _FakeRealtimeModel | None = None, + *, + tracing_disabled: bool = False, +) -> RealtimeSession: + return RealtimeSession( + model=model or _FakeRealtimeModel(), + agent=agent, + context=None, + run_config={"tracing_disabled": tracing_disabled} if tracing_disabled else {}, + ) + + +@pytest.mark.asyncio +async def test_session_creates_agent_span_on_enter(): + """Entering a RealtimeSession context must create an agent span.""" + agent = RealtimeAgent(name="greeter") + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert len(agent_spans) == 1, f"Expected 1 agent span, got {len(agent_spans)}" + + +@pytest.mark.asyncio +async def test_session_agent_span_has_correct_name(): + """The agent span name must match the RealtimeAgent name.""" + agent = RealtimeAgent(name="support_bot") + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].span_data.name == "support_bot" + + +@pytest.mark.asyncio +async def test_session_agent_span_finished_after_close(): + """The agent span must be finished (exported) once the session closes.""" + agent = RealtimeAgent(name="closer") + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].ended_at is not None + + +@pytest.mark.asyncio +async def test_session_span_includes_tool_names(): + """The agent span records the names of tools available to the agent.""" + from agents.tool import function_tool + + @function_tool + def my_tool() -> str: + """A test tool.""" + return "ok" + + agent = RealtimeAgent(name="tool_agent", tools=[my_tool]) + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].span_data.tools == ["my_tool"] + + +@pytest.mark.asyncio +async def test_session_span_includes_handoff_names(): + """The agent span records the names of handoff targets.""" + child = RealtimeAgent(name="specialist") + agent = RealtimeAgent(name="router", handoffs=[child]) + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].span_data.handoffs == ["specialist"] + + +@pytest.mark.asyncio +async def test_tracing_disabled_creates_no_agent_spans(): + """When tracing_disabled=True, no agent spans should be emitted.""" + agent = RealtimeAgent(name="silent") + session = _make_session(agent, tracing_disabled=True) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert len(agent_spans) == 0, f"Expected 0 agent spans, got {len(agent_spans)}" From ed34935eea40a88187e5939c350d52751ece9710 Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 09:29:52 +0800 Subject: [PATCH 2/8] fix: avoid cross-context span token errors and use resolved tool list Two issues raised in automated review of the initial commit: P1 (crash): When async_tool_calls=True (the default), handoffs execute inside asyncio.create_task(). start(mark_as_current=True) stores a contextvars token in the background task's context. _cleanup() runs in the main task and cannot reset that token; finish(reset_current=True) raises ValueError: Token was created in a different Context. Fix: _cleanup uses reset_current=False (session is ending; no need to restore the previous current span). The new agent span started during a handoff uses mark_as_current=False for the same reason. P2 (inaccurate metadata): _make_agent_span read agent.tools (static list) instead of the resolved list from get_all_tools(), so disabled function tools appeared in the span and MCP tools were omitted. Fix: tool names are no longer set in _make_agent_span; both __aenter__ and the handoff handler call agent.get_all_tools() after obtaining model settings and write the resolved names to span_data.tools via get_tool_trace_name_for_tool(), matching the pattern used by Runner. --- src/agents/realtime/session.py | 43 +++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index cbcbd02874..281a1d847c 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -14,6 +14,7 @@ FunctionToolLookupKey, get_function_tool_lookup_key_for_tool, get_function_tool_namespace, + get_tool_trace_name_for_tool, ) from ..agent import Agent from ..exceptions import UserError @@ -219,6 +220,13 @@ async def __aenter__(self) -> RealtimeSession: agent=self._current_agent, ) + # Update span with the resolved tool list (includes MCP tools and respects is_enabled). + if self._current_agent_span is not None: + resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) + self._current_agent_span.span_data.tools = [ + n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None + ] or None + # Connect to the model await self._model.connect(model_config) @@ -822,15 +830,20 @@ async def _handle_tool_call( # Store previous agent for event previous_agent = agent - # Finish the span for the outgoing agent and start one for the new agent. + # Finish the span for the outgoing agent. Use reset_current=False because this + # runs inside an asyncio background task; resetting a token from a different + # context raises ValueError. if self._current_agent_span is not None: - self._current_agent_span.finish(reset_current=True) + self._current_agent_span.finish(reset_current=False) # Update current agent self._current_agent = result + # Start a span for the new agent. Use mark_as_current=False for the same + # cross-context reason: _cleanup runs in the main task and cannot reset a + # token created here. self._current_agent_span = self._make_agent_span(self._current_agent) - self._current_agent_span.start(mark_as_current=True) + self._current_agent_span.start(mark_as_current=False) # Get updated model settings from new agent updated_settings = await self._get_updated_model_settings_from_agent( @@ -838,6 +851,15 @@ async def _handle_tool_call( agent=self._current_agent, ) + # Update span with the resolved tool list for the new agent. + if self._current_agent_span is not None: + resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) + self._current_agent_span.span_data.tools = [ + n + for t in resolved_tools + if (n := get_tool_trace_name_for_tool(t)) is not None + ] or None + # Send handoff event await self._put_event( RealtimeHandoffEvent( @@ -1249,9 +1271,12 @@ async def _cleanup(self) -> None: self._wake_event_iterators() return - # Finish the active agent span. + # Finish the active agent span. Use reset_current=False because _cleanup may be called + # from a different asyncio context than the one that started the span (e.g. after a + # handoff that ran in a background task), and resetting a token across contexts raises + # ValueError. if self._current_agent_span is not None: - self._current_agent_span.finish(reset_current=True) + self._current_agent_span.finish(reset_current=False) self._current_agent_span = None # Cancel and cleanup guardrail tasks @@ -1273,14 +1298,16 @@ async def _cleanup(self) -> None: self._wake_event_iterators() def _make_agent_span(self, agent: RealtimeAgent) -> Span[AgentSpanData]: - """Create a new agent span for the given agent, respecting tracing_disabled.""" + """Create a new agent span for the given agent, respecting tracing_disabled. + + Tool names are intentionally omitted here; callers must update span_data.tools + asynchronously via get_all_tools() to include MCP tools and respect is_enabled. + """ disabled: bool = bool(self._run_config.get("tracing_disabled", False)) handoff_names = [h.agent_name if isinstance(h, Handoff) else h.name for h in agent.handoffs] - tool_names = [t.name for t in agent.tools if isinstance(t, FunctionTool)] return agent_span( name=agent.name, handoffs=handoff_names or None, - tools=tool_names or None, disabled=disabled, ) From ceb50e07e90fe14aedd673e9f4ddf5de40adc2d0 Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 09:44:24 +0800 Subject: [PATCH 3/8] fix: reset initial agent span context token in _cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues fixed in RealtimeSession tracing: P1 – After closing a session without a handoff, the agent span started in __aenter__ with mark_as_current=True left its context-var token unreset because _cleanup always used reset_current=False. Subsequent spans in the same task were incorrectly parented under the stale (finished) span. Fix: manage the context-var token explicitly via Scope.set_current_span() in __aenter__ and store it in self._initial_span_token. _cleanup now calls Scope.reset_current_span() on that token, which is always safe because _cleanup runs via __aexit__ in the same task as __aenter__. The current span (which may be a handoff span started with mark_as_current=False) is still finished with reset_current=False to avoid cross-context token errors. P2 – If __aenter__ raised an exception after span start (e.g. model.connect() failed), __aexit__ was not called, leaving the span unfinished and current in the context. Fix: wrap the fallible setup section in a try/except that finishes the span and resets the token before re-raising. Fixes #1845 --- src/agents/realtime/session.py | 83 ++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 281a1d847c..289539dab9 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import contextvars import dataclasses import inspect import json @@ -26,6 +27,7 @@ from ..tool import DEFAULT_APPROVAL_REJECTION_MESSAGE, FunctionTool, invoke_function_tool from ..tool_context import ToolContext from ..tracing import Span, agent_span +from ..tracing.scope import Scope from ..tracing.span_data import AgentSpanData from ..util._approvals import evaluate_needs_approval_setting from .agent import RealtimeAgent @@ -197,6 +199,9 @@ def __init__( self._tool_call_tasks: set[asyncio.Task[Any]] = set() self._async_tool_calls: bool = bool(self._run_config.get("async_tool_calls", True)) self._current_agent_span: Span[AgentSpanData] | None = None + # Context-var token from the span created in __aenter__. Tracked here so _cleanup can + # reset it even if a handoff already called finish() on that span in a background task. + self._initial_span_token: contextvars.Token[Any] | None = None @property def model(self) -> RealtimeModel: @@ -207,38 +212,53 @@ async def __aenter__(self) -> RealtimeSession: """Start the session by connecting to the model. After this, you will be able to stream events from the model and send messages and audio to the model. """ - # Start an agent span for the initial agent. + # Start an agent span for the initial agent. Use mark_as_current=False and manage + # the context-var token ourselves so _cleanup can reset it even when a handoff + # already called finish() on this span from a background task. self._current_agent_span = self._make_agent_span(self._current_agent) - self._current_agent_span.start(mark_as_current=True) + self._current_agent_span.start(mark_as_current=False) + self._initial_span_token = Scope.set_current_span(self._current_agent_span) - # Add ourselves as a listener - self._model.add_listener(self) - - model_config = self._model_config.copy() - model_config["initial_model_settings"] = await self._get_updated_model_settings_from_agent( - starting_settings=self._model_config.get("initial_model_settings", None), - agent=self._current_agent, - ) + try: + # Add ourselves as a listener + self._model.add_listener(self) + + model_config = self._model_config.copy() + model_config[ + "initial_model_settings" + ] = await self._get_updated_model_settings_from_agent( + starting_settings=self._model_config.get("initial_model_settings", None), + agent=self._current_agent, + ) - # Update span with the resolved tool list (includes MCP tools and respects is_enabled). - if self._current_agent_span is not None: - resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) - self._current_agent_span.span_data.tools = [ - n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None - ] or None + # Update span with the resolved tool list (includes MCP tools and respects is_enabled). + if self._current_agent_span is not None: + resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) + self._current_agent_span.span_data.tools = [ + n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None + ] or None - # Connect to the model - await self._model.connect(model_config) + # Connect to the model + await self._model.connect(model_config) - # Emit initial history update - await self._put_event( - RealtimeHistoryUpdated( - history=self._history, - info=self._event_info, + # Emit initial history update + await self._put_event( + RealtimeHistoryUpdated( + history=self._history, + info=self._event_info, + ) ) - ) - return self + return self + except BaseException: + # __aexit__ is not called when __aenter__ raises, so clean up the span here. + if self._current_agent_span is not None: + self._current_agent_span.finish(reset_current=False) + self._current_agent_span = None + if self._initial_span_token is not None: + Scope.reset_current_span(self._initial_span_token) + self._initial_span_token = None + raise async def enter(self) -> RealtimeSession: """Enter the async context manager. We strongly recommend using the async context manager @@ -1271,13 +1291,18 @@ async def _cleanup(self) -> None: self._wake_event_iterators() return - # Finish the active agent span. Use reset_current=False because _cleanup may be called - # from a different asyncio context than the one that started the span (e.g. after a - # handoff that ran in a background task), and resetting a token across contexts raises - # ValueError. + # Finish the active agent span. Use reset_current=False because, after a handoff, + # the current span was started in a background task (mark_as_current=False) and has + # no token to reset. The context-var token for the *initial* span (created in + # __aenter__) is reset separately below. if self._current_agent_span is not None: self._current_agent_span.finish(reset_current=False) self._current_agent_span = None + # Reset the context-var token that __aenter__ stored. _cleanup is always called from + # __aexit__ (same task context as __aenter__), so this reset is always safe here. + if self._initial_span_token is not None: + Scope.reset_current_span(self._initial_span_token) + self._initial_span_token = None # Cancel and cleanup guardrail tasks self._cleanup_guardrail_tasks() From b61f80f55743ae24bd15b663d6c6eae312c6a1c1 Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 10:09:22 +0800 Subject: [PATCH 4/8] fix: guard NoOpSpan context install, clear span for handoff siblings, filter handoffs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues fixed per tracing conventions (local_dev/notes/tracing-conventions.md): Convention 3 — NoOpSpan poisoning: __aenter__ now skips Scope.set_current_span() when _make_agent_span() returns a NoOpSpan (no active trace, or tracing_disabled). Installing a NoOpSpan as current would cause provider.create_span() to return NoOpSpan for every subsequent child span in the same context (_is_noop_span check in provider.py). Convention 5 — Span parent fixed at create_span() call time: Before creating the incoming agent span in the handoff handler (background task), the current span context is temporarily cleared via Scope.set_current_span(None) and immediately restored. Without this, the outgoing agent span (still "current" in the background task's copied context because finish(reset_current=False) does not reset the context var) would be used as parent_id for the new span, producing incorrect parent-child nesting instead of sibling spans under the trace root. Convention 7 — is_enabled filtering for handoffs: _make_agent_span() no longer reads agent.handoffs directly. Both __aenter__ and the handoff handler now call _get_handoffs() (which applies each Handoff.is_enabled predicate) and write the filtered result to span_data.handoffs, matching the pattern used by Runner for handoff metadata. Tests added for all three behaviors: - test_no_active_trace_does_not_poison_span_context - test_disabled_handoff_excluded_from_span_metadata - test_handoff_span_is_sibling_not_child_of_initial_span Fixes #1845 --- src/agents/realtime/session.py | 53 +++++++++++----- tests/realtime/test_session_spans.py | 93 +++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 17 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 289539dab9..16b50c1ff5 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -29,6 +29,7 @@ from ..tracing import Span, agent_span from ..tracing.scope import Scope from ..tracing.span_data import AgentSpanData +from ..tracing.spans import NoOpSpan from ..util._approvals import evaluate_needs_approval_setting from .agent import RealtimeAgent from .config import RealtimeRunConfig, RealtimeSessionModelSettings, RealtimeUserInput @@ -217,7 +218,11 @@ async def __aenter__(self) -> RealtimeSession: # already called finish() on this span from a background task. self._current_agent_span = self._make_agent_span(self._current_agent) self._current_agent_span.start(mark_as_current=False) - self._initial_span_token = Scope.set_current_span(self._current_agent_span) + # Only install the span as current when it is a real span. Setting a NoOpSpan as + # current poisons the context: provider.create_span() returns NoOpSpan for every + # child span when it detects a no-op parent (provider.py _is_noop_span check). + if not isinstance(self._current_agent_span, NoOpSpan): + self._initial_span_token = Scope.set_current_span(self._current_agent_span) try: # Add ourselves as a listener @@ -231,12 +236,19 @@ async def __aenter__(self) -> RealtimeSession: agent=self._current_agent, ) - # Update span with the resolved tool list (includes MCP tools and respects is_enabled). - if self._current_agent_span is not None: + # Update span metadata: tools (respects is_enabled, includes MCP) and + # handoffs (filtered by _get_handoffs to exclude disabled ones). + if not isinstance(self._current_agent_span, NoOpSpan): resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) self._current_agent_span.span_data.tools = [ n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None ] or None + enabled_handoffs = await self._get_handoffs( + self._current_agent, self._context_wrapper + ) + self._current_agent_span.span_data.handoffs = [ + h.agent_name for h in enabled_handoffs + ] or None # Connect to the model await self._model.connect(model_config) @@ -859,11 +871,17 @@ async def _handle_tool_call( # Update current agent self._current_agent = result - # Start a span for the new agent. Use mark_as_current=False for the same - # cross-context reason: _cleanup runs in the main task and cannot reset a - # token created here. + # Create the incoming agent span with a clear current-span context so that + # its parent is the trace root, not the finished outgoing agent span. + # The outgoing span is still "current" in this background task context + # (finish(reset_current=False) does not reset the context var), so we must + # temporarily clear it before calling _make_agent_span() / agent_span() — + # provider.create_span() reads Scope.get_current_span() at creation time to + # determine parent_id. + _handoff_clear_token = Scope.set_current_span(None) self._current_agent_span = self._make_agent_span(self._current_agent) self._current_agent_span.start(mark_as_current=False) + Scope.reset_current_span(_handoff_clear_token) # Get updated model settings from new agent updated_settings = await self._get_updated_model_settings_from_agent( @@ -871,14 +889,20 @@ async def _handle_tool_call( agent=self._current_agent, ) - # Update span with the resolved tool list for the new agent. - if self._current_agent_span is not None: + # Update span metadata: tools and filtered handoffs. + if not isinstance(self._current_agent_span, NoOpSpan): resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) self._current_agent_span.span_data.tools = [ n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None ] or None + enabled_handoffs = await self._get_handoffs( + self._current_agent, self._context_wrapper + ) + self._current_agent_span.span_data.handoffs = [ + h.agent_name for h in enabled_handoffs + ] or None # Send handoff event await self._put_event( @@ -1325,16 +1349,13 @@ async def _cleanup(self) -> None: def _make_agent_span(self, agent: RealtimeAgent) -> Span[AgentSpanData]: """Create a new agent span for the given agent, respecting tracing_disabled. - Tool names are intentionally omitted here; callers must update span_data.tools - asynchronously via get_all_tools() to include MCP tools and respect is_enabled. + Both tool names and handoff names are intentionally omitted here. Callers must + update span_data.tools via get_all_tools() and span_data.handoffs via + _get_handoffs() asynchronously to reflect only what is actually sent to the model + (respects is_enabled on both tools and handoffs). """ disabled: bool = bool(self._run_config.get("tracing_disabled", False)) - handoff_names = [h.agent_name if isinstance(h, Handoff) else h.name for h in agent.handoffs] - return agent_span( - name=agent.name, - handoffs=handoff_names or None, - disabled=disabled, - ) + return agent_span(name=agent.name, disabled=disabled) async def _get_updated_model_settings_from_agent( self, diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py index 3eea62c975..80a13185a3 100644 --- a/tests/realtime/test_session_spans.py +++ b/tests/realtime/test_session_spans.py @@ -2,15 +2,17 @@ from __future__ import annotations +import asyncio from typing import Any import pytest from agents.realtime.agent import RealtimeAgent from agents.realtime.model import RealtimeModel, RealtimeModelConfig, RealtimeModelListener -from agents.realtime.model_events import RealtimeModelEvent +from agents.realtime.model_events import RealtimeModelEvent, RealtimeModelToolCallEvent from agents.realtime.session import RealtimeSession from agents.tracing import trace +from agents.tracing.scope import Scope from agents.tracing.span_data import AgentSpanData from tests.testing_processor import SPAN_PROCESSOR_TESTING @@ -167,3 +169,92 @@ async def test_tracing_disabled_creates_no_agent_spans(): spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] assert len(agent_spans) == 0, f"Expected 0 agent spans, got {len(agent_spans)}" + + +@pytest.mark.asyncio +async def test_no_active_trace_does_not_poison_span_context(): + """Without an outer trace(), the session must not install a NoOpSpan as current. + + Convention: provider returns NoOpSpan when no active trace exists. Installing + a NoOpSpan as current would make every span created afterward also a NoOpSpan + (provider._is_noop_span check). The session must skip Scope.set_current_span() + for NoOpSpans so ambient context is unchanged after the session closes. + """ + span_before = Scope.get_current_span() + agent = RealtimeAgent(name="agent") + session = _make_session(agent) + + # Enter/exit WITHOUT any enclosing trace — span will be a NoOpSpan. + async with session: + pass + + span_after = Scope.get_current_span() + assert span_before is span_after, ( + "Session must not permanently alter the current span context when no active trace exists." + ) + + +@pytest.mark.asyncio +async def test_disabled_handoff_excluded_from_span_metadata(): + """Handoffs with is_enabled=False must not appear in span handoff metadata. + + Convention: span metadata must reflect what was actually sent to the model. + _get_handoffs() filters by is_enabled; raw agent.handoffs must not be used. + """ + from agents.realtime.handoffs import realtime_handoff + + specialist = RealtimeAgent(name="specialist") + disabled_handoff = realtime_handoff(specialist, is_enabled=False) + agent = RealtimeAgent(name="router", handoffs=[disabled_handoff]) + session = _make_session(agent) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].span_data.handoffs is None, ( + f"Disabled handoff should not appear in span metadata, " + f"got: {agent_spans[0].span_data.handoffs}" + ) + + +@pytest.mark.asyncio +async def test_handoff_span_is_sibling_not_child_of_initial_span(): + """After a handoff the new agent span must be a sibling of the first, not its child. + + Convention: the incoming agent span's parent_id must not equal the outgoing agent + span's span_id. Both should be direct children of the trace root (parent_id=None). + """ + specialist = RealtimeAgent(name="specialist") + router = RealtimeAgent(name="router", handoffs=[specialist]) + model = _FakeRealtimeModel() + session = _make_session(router, model) + + with trace("test"): + async with session: + # Fire the handoff tool call that the model would send. + await model.dispatch( + RealtimeModelToolCallEvent( + name="transfer_to_specialist", + call_id="call_001", + arguments="{}", + ) + ) + # Let the background task spawned by async_tool_calls complete. + await asyncio.sleep(0.05) + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert len(agent_spans) == 2, ( + f"Expected 2 agent spans (router + specialist), got {len(agent_spans)}" + ) + + router_span = next(s for s in agent_spans if s.span_data.name == "router") + specialist_span = next(s for s in agent_spans if s.span_data.name == "specialist") + + assert specialist_span.parent_id != router_span.span_id, ( + "Specialist span must not be a child of the router span. " + f"specialist.parent_id={specialist_span.parent_id}, router.span_id={router_span.span_id}" + ) From 0470b2d0e3ed595ce735257742dc0a8546b2c2cf Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 10:47:53 +0800 Subject: [PATCH 5/8] fix: return (settings, tools, handoffs) tuple from _get_updated_model_settings_from_agent Eliminates duplicate get_all_tools() / _get_handoffs() calls by returning already-computed results as a tuple. Callers in __aenter__, update_agent(), and handoff handler now unpack the tuple; test callers updated to match. Also guard _cleanup token reset against cross-task ValueError; defer span context reset to __aexit__ which is guaranteed to run in the same task as __aenter__. Add 5 new regression tests covering NoOpSpan poisoning, disabled handoff filtering, sibling span hierarchy, cross-task cleanup, and direct close() context reset. --- src/agents/realtime/session.py | 68 +++++++++++++++++----------- tests/realtime/test_session.py | 8 ++-- tests/realtime/test_session_spans.py | 42 +++++++++++++++++ tests/realtime/test_tracing.py | 2 +- 4 files changed, 89 insertions(+), 31 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 16b50c1ff5..24fc374e50 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -229,23 +229,22 @@ async def __aenter__(self) -> RealtimeSession: self._model.add_listener(self) model_config = self._model_config.copy() - model_config[ - "initial_model_settings" - ] = await self._get_updated_model_settings_from_agent( + ( + initial_settings, + resolved_tools, + enabled_handoffs, + ) = await self._get_updated_model_settings_from_agent( starting_settings=self._model_config.get("initial_model_settings", None), agent=self._current_agent, ) + model_config["initial_model_settings"] = initial_settings - # Update span metadata: tools (respects is_enabled, includes MCP) and - # handoffs (filtered by _get_handoffs to exclude disabled ones). + # Reuse the tools/handoffs already resolved above — avoids a second call to + # get_all_tools()/get_handoffs() and ensures span metadata matches the model. if not isinstance(self._current_agent_span, NoOpSpan): - resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) self._current_agent_span.span_data.tools = [ n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None ] or None - enabled_handoffs = await self._get_handoffs( - self._current_agent, self._context_wrapper - ) self._current_agent_span.span_data.handoffs = [ h.agent_name for h in enabled_handoffs ] or None @@ -281,6 +280,14 @@ async def enter(self) -> RealtimeSession: async def __aexit__(self, _exc_type: Any, _exc_val: Any, _exc_tb: Any) -> None: """End the session.""" + # Reset the initial span token here. __aexit__ is always invoked in the same + # asyncio task as __aenter__, so this reset is unconditionally safe. We do it + # here rather than relying solely on _cleanup because _cleanup is also reachable + # from close() and __aiter__, which may run in different tasks and would raise + # ValueError on ContextVar.reset(). + if self._initial_span_token is not None: + Scope.reset_current_span(self._initial_span_token) + self._initial_span_token = None await self.close() async def __aiter__(self) -> AsyncIterator[RealtimeSessionEvent]: @@ -327,7 +334,7 @@ async def update_agent(self, agent: RealtimeAgent) -> None: """Update the active agent for this session and apply its settings to the model.""" self._current_agent = agent - updated_settings = await self._get_updated_model_settings_from_agent( + updated_settings, _, _ = await self._get_updated_model_settings_from_agent( starting_settings=None, agent=self._current_agent, ) @@ -883,23 +890,23 @@ async def _handle_tool_call( self._current_agent_span.start(mark_as_current=False) Scope.reset_current_span(_handoff_clear_token) - # Get updated model settings from new agent - updated_settings = await self._get_updated_model_settings_from_agent( + # Get updated model settings from new agent; reuse resolved tools and + # handoffs for span metadata to avoid a redundant second call. + ( + updated_settings, + resolved_tools, + enabled_handoffs, + ) = await self._get_updated_model_settings_from_agent( starting_settings=None, agent=self._current_agent, ) - # Update span metadata: tools and filtered handoffs. if not isinstance(self._current_agent_span, NoOpSpan): - resolved_tools = await self._current_agent.get_all_tools(self._context_wrapper) self._current_agent_span.span_data.tools = [ n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None ] or None - enabled_handoffs = await self._get_handoffs( - self._current_agent, self._context_wrapper - ) self._current_agent_span.span_data.handoffs = [ h.agent_name for h in enabled_handoffs ] or None @@ -1315,17 +1322,20 @@ async def _cleanup(self) -> None: self._wake_event_iterators() return - # Finish the active agent span. Use reset_current=False because, after a handoff, - # the current span was started in a background task (mark_as_current=False) and has - # no token to reset. The context-var token for the *initial* span (created in - # __aenter__) is reset separately below. + # Finish the active agent span. if self._current_agent_span is not None: self._current_agent_span.finish(reset_current=False) self._current_agent_span = None - # Reset the context-var token that __aenter__ stored. _cleanup is always called from - # __aexit__ (same task context as __aenter__), so this reset is always safe here. + # Reset the initial span's context-var token. __aexit__ handles this + # unconditionally (it runs in the same task as __aenter__). This fallback + # handles direct close() calls from the same task; if close() or __aiter__ + # triggers _cleanup from a different task the ValueError is caught and the + # token is left for __aexit__ to reset. if self._initial_span_token is not None: - Scope.reset_current_span(self._initial_span_token) + try: + Scope.reset_current_span(self._initial_span_token) + except ValueError: + pass # Cross-task call; __aexit__ will reset from the correct task. self._initial_span_token = None # Cancel and cleanup guardrail tasks @@ -1361,7 +1371,13 @@ async def _get_updated_model_settings_from_agent( self, starting_settings: RealtimeSessionModelSettings | None, agent: RealtimeAgent, - ) -> RealtimeSessionModelSettings: + ) -> tuple[RealtimeSessionModelSettings, list[Any], list[Handoff[Any, RealtimeAgent[Any]]]]: + """Return (settings, resolved_tools, enabled_handoffs). + + resolved_tools and enabled_handoffs are captured before starting_settings overrides + so callers can use them for span metadata without re-invoking get_all_tools() or + _get_handoffs() a second time. + """ # Start with the merged base settings from run and model configuration. updated_settings = self._base_model_settings.copy() @@ -1385,7 +1401,7 @@ async def _get_updated_model_settings_from_agent( if disable_tracing: updated_settings["tracing"] = None - return updated_settings + return updated_settings, list(tools or []), list(handoffs or []) @classmethod async def _get_handoffs( diff --git a/tests/realtime/test_session.py b/tests/realtime/test_session.py index e45db679f5..d4816f3a32 100644 --- a/tests/realtime/test_session.py +++ b/tests/realtime/test_session.py @@ -2627,7 +2627,7 @@ async def mock_get_handoffs(cls, agent, context_wrapper): m.setattr("agents.realtime.session.RealtimeSession._get_handoffs", mock_get_handoffs) # Test the method directly - model_settings = await session._get_updated_model_settings_from_agent( + model_settings, _, _ = await session._get_updated_model_settings_from_agent( starting_settings=model_config_initial_settings, agent=agent ) @@ -2677,7 +2677,7 @@ async def mock_get_handoffs(cls, agent, context_wrapper): with pytest.MonkeyPatch().context() as m: m.setattr("agents.realtime.session.RealtimeSession._get_handoffs", mock_get_handoffs) - model_settings = await session._get_updated_model_settings_from_agent( + model_settings, _, _ = await session._get_updated_model_settings_from_agent( starting_settings=None, # No initial settings agent=agent, ) @@ -2723,7 +2723,7 @@ async def mock_get_handoffs(cls, agent, context_wrapper): with pytest.MonkeyPatch().context() as m: m.setattr("agents.realtime.session.RealtimeSession._get_handoffs", mock_get_handoffs) - model_settings = await session._get_updated_model_settings_from_agent( + model_settings, _, _ = await session._get_updated_model_settings_from_agent( starting_settings=model_config_settings, agent=agent ) @@ -2770,7 +2770,7 @@ async def mock_get_handoffs(cls, agent, context_wrapper): mock_get_handoffs, ) - model_settings = await session._get_updated_model_settings_from_agent( + model_settings, _, _ = await session._get_updated_model_settings_from_agent( starting_settings=None, agent=agent, ) diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py index 80a13185a3..b37c38a5a5 100644 --- a/tests/realtime/test_session_spans.py +++ b/tests/realtime/test_session_spans.py @@ -220,6 +220,48 @@ async def test_disabled_handoff_excluded_from_span_metadata(): ) +@pytest.mark.asyncio +async def test_cleanup_from_different_task_does_not_raise(): + """_cleanup called from a task other than __aenter__'s task must not raise ValueError. + + close() is public and __aiter__ also calls _cleanup when _stored_exception is set. + Both can run in a different asyncio task than __aenter__. Resetting a contextvars + token from a different task raises ValueError — this must be caught gracefully. + """ + agent = RealtimeAgent(name="agent") + session = _make_session(agent) + + with trace("test"): + await session.enter() # open the session in this (main) task + + # Call _cleanup from a background task — it gets a copied context, so the + # token stored by __aenter__ in the main task cannot be reset here; must not raise. + async def close_from_other_task() -> None: + await session._cleanup() + + await asyncio.create_task(close_from_other_task()) + + assert session._closed is True + + +@pytest.mark.asyncio +async def test_span_context_clean_after_close_called_directly(): + """Span context must be reset even when close() is called directly (no async with). + + Method: enter via session.enter(), call close() directly, verify Scope is clean. + """ + span_before = Scope.get_current_span() + agent = RealtimeAgent(name="agent") + session = _make_session(agent) + + with trace("test"): + await session.enter() + await session.close() + + span_after = Scope.get_current_span() + assert span_before is span_after, "Calling close() directly must still reset the span context." + + @pytest.mark.asyncio async def test_handoff_span_is_sibling_not_child_of_initial_span(): """After a handoff the new agent span must be a sibling of the first, not its child. diff --git a/tests/realtime/test_tracing.py b/tests/realtime/test_tracing.py index bacde6703c..ff6d297ec4 100644 --- a/tests/realtime/test_tracing.py +++ b/tests/realtime/test_tracing.py @@ -257,7 +257,7 @@ async def test_tracing_disabled_prevents_tracing(self, mock_websocket): ) # Test the _get_updated_model_settings_from_agent method directly - model_settings = await session._get_updated_model_settings_from_agent( + model_settings, _, _ = await session._get_updated_model_settings_from_agent( starting_settings=None, agent=agent ) From 94084651fae1ff25684f831de1ed35a0b02a2e9e Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 11:03:46 +0800 Subject: [PATCH 6/8] fix: don't install agent spans as current ContextVar; derive span metadata from final settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves two root-cause issues: 1. Stale current span after handoff (#3353121454): installing the initial agent span as the ContextVar current span in the main task causes it to remain current even after a handoff finishes that span in a background task. asyncio tasks inherit a snapshot of the parent context — the background task cannot update the main task's ContextVar. Remove all Scope.set_current_span calls; agent spans are created and finished without being set as current. This also eliminates _initial_span_token and all related cross-task token- reset complexity. 2. Span metadata mismatch after model_config override (#3353121446): _get_updated_model_settings_from_agent returned the pre-override tools and handoffs even when starting_settings had overridden them. Now returns updated_settings["tools"/"handoffs"] after the update() call so span metadata matches what was actually sent to the model. Add test_span_tool_metadata_reflects_model_config_override to cover case 2. Update docstrings on no-longer-ContextVar-dependent tests to reflect new design. --- src/agents/realtime/session.py | 76 +++++++++------------------- tests/realtime/test_session_spans.py | 70 ++++++++++++++++++------- 2 files changed, 74 insertions(+), 72 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 24fc374e50..2dc49fb88d 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -1,7 +1,6 @@ from __future__ import annotations import asyncio -import contextvars import dataclasses import inspect import json @@ -27,7 +26,6 @@ from ..tool import DEFAULT_APPROVAL_REJECTION_MESSAGE, FunctionTool, invoke_function_tool from ..tool_context import ToolContext from ..tracing import Span, agent_span -from ..tracing.scope import Scope from ..tracing.span_data import AgentSpanData from ..tracing.spans import NoOpSpan from ..util._approvals import evaluate_needs_approval_setting @@ -200,9 +198,6 @@ def __init__( self._tool_call_tasks: set[asyncio.Task[Any]] = set() self._async_tool_calls: bool = bool(self._run_config.get("async_tool_calls", True)) self._current_agent_span: Span[AgentSpanData] | None = None - # Context-var token from the span created in __aenter__. Tracked here so _cleanup can - # reset it even if a handoff already called finish() on that span in a background task. - self._initial_span_token: contextvars.Token[Any] | None = None @property def model(self) -> RealtimeModel: @@ -213,16 +208,14 @@ async def __aenter__(self) -> RealtimeSession: """Start the session by connecting to the model. After this, you will be able to stream events from the model and send messages and audio to the model. """ - # Start an agent span for the initial agent. Use mark_as_current=False and manage - # the context-var token ourselves so _cleanup can reset it even when a handoff - # already called finish() on this span from a background task. + # Create the agent span. Do not install it as the current ContextVar span: + # asyncio tasks inherit a snapshot of their parent's context, so a bg task + # cannot update the main task's context var. Installing the span would leave a + # stale (finished) span as "current" after any handoff that runs in a bg task. + # Agent spans are emitted as children of the enclosing trace without being set + # as current, which is correct and avoids all cross-task ContextVar management. self._current_agent_span = self._make_agent_span(self._current_agent) self._current_agent_span.start(mark_as_current=False) - # Only install the span as current when it is a real span. Setting a NoOpSpan as - # current poisons the context: provider.create_span() returns NoOpSpan for every - # child span when it detects a no-op parent (provider.py _is_noop_span check). - if not isinstance(self._current_agent_span, NoOpSpan): - self._initial_span_token = Scope.set_current_span(self._current_agent_span) try: # Add ourselves as a listener @@ -239,8 +232,9 @@ async def __aenter__(self) -> RealtimeSession: ) model_config["initial_model_settings"] = initial_settings - # Reuse the tools/handoffs already resolved above — avoids a second call to - # get_all_tools()/get_handoffs() and ensures span metadata matches the model. + # Reuse the resolved tools/handoffs returned above — avoids a second call and + # ensures span metadata matches what was actually sent to the model, including + # any overrides applied by starting_settings. if not isinstance(self._current_agent_span, NoOpSpan): self._current_agent_span.span_data.tools = [ n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None @@ -266,9 +260,6 @@ async def __aenter__(self) -> RealtimeSession: if self._current_agent_span is not None: self._current_agent_span.finish(reset_current=False) self._current_agent_span = None - if self._initial_span_token is not None: - Scope.reset_current_span(self._initial_span_token) - self._initial_span_token = None raise async def enter(self) -> RealtimeSession: @@ -280,14 +271,6 @@ async def enter(self) -> RealtimeSession: async def __aexit__(self, _exc_type: Any, _exc_val: Any, _exc_tb: Any) -> None: """End the session.""" - # Reset the initial span token here. __aexit__ is always invoked in the same - # asyncio task as __aenter__, so this reset is unconditionally safe. We do it - # here rather than relying solely on _cleanup because _cleanup is also reachable - # from close() and __aiter__, which may run in different tasks and would raise - # ValueError on ContextVar.reset(). - if self._initial_span_token is not None: - Scope.reset_current_span(self._initial_span_token) - self._initial_span_token = None await self.close() async def __aiter__(self) -> AsyncIterator[RealtimeSessionEvent]: @@ -878,17 +861,12 @@ async def _handle_tool_call( # Update current agent self._current_agent = result - # Create the incoming agent span with a clear current-span context so that - # its parent is the trace root, not the finished outgoing agent span. - # The outgoing span is still "current" in this background task context - # (finish(reset_current=False) does not reset the context var), so we must - # temporarily clear it before calling _make_agent_span() / agent_span() — - # provider.create_span() reads Scope.get_current_span() at creation time to - # determine parent_id. - _handoff_clear_token = Scope.set_current_span(None) + # Create the incoming agent span. Because we never install agent spans as + # current (see __aenter__), this background task's context already holds the + # trace root as the current span — provider.create_span() will parent the new + # span to the trace root, making it a sibling of the outgoing agent span. self._current_agent_span = self._make_agent_span(self._current_agent) self._current_agent_span.start(mark_as_current=False) - Scope.reset_current_span(_handoff_clear_token) # Get updated model settings from new agent; reuse resolved tools and # handoffs for span metadata to avoid a redundant second call. @@ -1326,17 +1304,6 @@ async def _cleanup(self) -> None: if self._current_agent_span is not None: self._current_agent_span.finish(reset_current=False) self._current_agent_span = None - # Reset the initial span's context-var token. __aexit__ handles this - # unconditionally (it runs in the same task as __aenter__). This fallback - # handles direct close() calls from the same task; if close() or __aiter__ - # triggers _cleanup from a different task the ValueError is caught and the - # token is left for __aexit__ to reset. - if self._initial_span_token is not None: - try: - Scope.reset_current_span(self._initial_span_token) - except ValueError: - pass # Cross-task call; __aexit__ will reset from the correct task. - self._initial_span_token = None # Cancel and cleanup guardrail tasks self._cleanup_guardrail_tasks() @@ -1371,12 +1338,12 @@ async def _get_updated_model_settings_from_agent( self, starting_settings: RealtimeSessionModelSettings | None, agent: RealtimeAgent, - ) -> tuple[RealtimeSessionModelSettings, list[Any], list[Handoff[Any, RealtimeAgent[Any]]]]: - """Return (settings, resolved_tools, enabled_handoffs). + ) -> tuple[RealtimeSessionModelSettings, list[Any], list[Any]]: + """Return (settings, final_tools, final_handoffs). - resolved_tools and enabled_handoffs are captured before starting_settings overrides - so callers can use them for span metadata without re-invoking get_all_tools() or - _get_handoffs() a second time. + final_tools and final_handoffs reflect the values in the returned settings after + starting_settings overrides are applied. Callers must use these for span metadata + to ensure the span reports exactly what was sent to the model. """ # Start with the merged base settings from run and model configuration. updated_settings = self._base_model_settings.copy() @@ -1393,7 +1360,7 @@ async def _get_updated_model_settings_from_agent( updated_settings["tools"] = tools or [] updated_settings["handoffs"] = handoffs or [] - # Apply starting settings (from model config) next + # Apply starting_settings (from model config) — may override tools and handoffs. if starting_settings: updated_settings.update(starting_settings) @@ -1401,7 +1368,10 @@ async def _get_updated_model_settings_from_agent( if disable_tracing: updated_settings["tracing"] = None - return updated_settings, list(tools or []), list(handoffs or []) + # Return the final tools/handoffs AFTER overrides so span metadata matches the model. + final_tools = list(updated_settings.get("tools") or []) + final_handoffs = list(updated_settings.get("handoffs") or []) + return updated_settings, final_tools, final_handoffs @classmethod async def _get_handoffs( diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py index b37c38a5a5..e006029bbf 100644 --- a/tests/realtime/test_session_spans.py +++ b/tests/realtime/test_session_spans.py @@ -173,25 +173,22 @@ async def test_tracing_disabled_creates_no_agent_spans(): @pytest.mark.asyncio async def test_no_active_trace_does_not_poison_span_context(): - """Without an outer trace(), the session must not install a NoOpSpan as current. + """Without an outer trace(), the session must not alter the ambient span context. - Convention: provider returns NoOpSpan when no active trace exists. Installing - a NoOpSpan as current would make every span created afterward also a NoOpSpan - (provider._is_noop_span check). The session must skip Scope.set_current_span() - for NoOpSpans so ambient context is unchanged after the session closes. + Convention: RealtimeSession never installs agent spans as the ContextVar current span, + so the context is always unchanged before and after the session regardless of whether + a real trace exists. """ span_before = Scope.get_current_span() agent = RealtimeAgent(name="agent") session = _make_session(agent) - # Enter/exit WITHOUT any enclosing trace — span will be a NoOpSpan. + # Enter/exit WITHOUT any enclosing trace. async with session: pass span_after = Scope.get_current_span() - assert span_before is span_after, ( - "Session must not permanently alter the current span context when no active trace exists." - ) + assert span_before is span_after, "Session must not permanently alter the current span context." @pytest.mark.asyncio @@ -222,20 +219,17 @@ async def test_disabled_handoff_excluded_from_span_metadata(): @pytest.mark.asyncio async def test_cleanup_from_different_task_does_not_raise(): - """_cleanup called from a task other than __aenter__'s task must not raise ValueError. + """_cleanup called from a different asyncio task must not raise and must close the session. close() is public and __aiter__ also calls _cleanup when _stored_exception is set. - Both can run in a different asyncio task than __aenter__. Resetting a contextvars - token from a different task raises ValueError — this must be caught gracefully. + Both can run in a different asyncio task than __aenter__. """ agent = RealtimeAgent(name="agent") session = _make_session(agent) with trace("test"): - await session.enter() # open the session in this (main) task + await session.enter() - # Call _cleanup from a background task — it gets a copied context, so the - # token stored by __aenter__ in the main task cannot be reset here; must not raise. async def close_from_other_task() -> None: await session._cleanup() @@ -245,10 +239,11 @@ async def close_from_other_task() -> None: @pytest.mark.asyncio -async def test_span_context_clean_after_close_called_directly(): - """Span context must be reset even when close() is called directly (no async with). +async def test_span_context_unchanged_after_close_called_directly(): + """Ambient span context must be unchanged whether exited via async with or close(). - Method: enter via session.enter(), call close() directly, verify Scope is clean. + Convention: RealtimeSession never installs agent spans as the ContextVar current span, + so close() has no context cleanup to perform; state before and after is identical. """ span_before = Scope.get_current_span() agent = RealtimeAgent(name="agent") @@ -259,7 +254,7 @@ async def test_span_context_clean_after_close_called_directly(): await session.close() span_after = Scope.get_current_span() - assert span_before is span_after, "Calling close() directly must still reset the span context." + assert span_before is span_after, "Calling close() directly must not alter the span context." @pytest.mark.asyncio @@ -300,3 +295,40 @@ async def test_handoff_span_is_sibling_not_child_of_initial_span(): "Specialist span must not be a child of the router span. " f"specialist.parent_id={specialist_span.parent_id}, router.span_id={router_span.span_id}" ) + + +@pytest.mark.asyncio +async def test_span_tool_metadata_reflects_model_config_override(): + """model_config.initial_model_settings tool override must be reflected in span metadata. + + Convention: span metadata must match what was actually sent to the model. When + initial_model_settings overrides tools (e.g. to empty), the span must show the + override — not the agent's default tool list. + """ + from agents.tool import function_tool + + @function_tool + def my_tool() -> str: + """A test tool.""" + return "ok" + + agent = RealtimeAgent(name="tool_agent", tools=[my_tool]) + # model_config overrides tools with an empty list, wiping the agent's tool. + session = RealtimeSession( + model=_FakeRealtimeModel(), + agent=agent, + context=None, + model_config={"initial_model_settings": {"tools": []}}, + run_config={}, + ) + + with trace("test"): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert agent_spans[0].span_data.tools is None, ( + f"model_config tool override must clear tools from span, " + f"got: {agent_spans[0].span_data.tools}" + ) From ee9f76b67ccacbe9aed3c5ae0fa56d43c53455e2 Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 14:06:24 +0800 Subject: [PATCH 7/8] fix: add __aenter__ failure test and update _make_agent_span docstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test_aenter_failure_finishes_span to cover the except BaseException cleanup path in __aenter__ — the final missing entry from the tracing conventions test checklist (Rule 9). Update the _make_agent_span docstring to reflect the current design: callers now populate span_data from _get_updated_model_settings_from_agent tuple return rather than calling get_all_tools() / _get_handoffs() separately. --- src/agents/realtime/session.py | 8 +++---- tests/realtime/test_session_spans.py | 33 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 2dc49fb88d..266e7b2723 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -1326,10 +1326,10 @@ async def _cleanup(self) -> None: def _make_agent_span(self, agent: RealtimeAgent) -> Span[AgentSpanData]: """Create a new agent span for the given agent, respecting tracing_disabled. - Both tool names and handoff names are intentionally omitted here. Callers must - update span_data.tools via get_all_tools() and span_data.handoffs via - _get_handoffs() asynchronously to reflect only what is actually sent to the model - (respects is_enabled on both tools and handoffs). + Tool and handoff names are intentionally omitted here. Callers must populate + span_data.tools and span_data.handoffs from the tuple returned by + _get_updated_model_settings_from_agent() so that metadata reflects what was + actually sent to the model (after is_enabled filtering and any model_config overrides). """ disabled: bool = bool(self._run_config.get("tracing_disabled", False)) return agent_span(name=agent.name, disabled=disabled) diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py index e006029bbf..9c16035ed5 100644 --- a/tests/realtime/test_session_spans.py +++ b/tests/realtime/test_session_spans.py @@ -297,6 +297,39 @@ async def test_handoff_span_is_sibling_not_child_of_initial_span(): ) +@pytest.mark.asyncio +async def test_aenter_failure_finishes_span(): + """If __aenter__ raises after the span is started, the span must still be finished. + + Python does not call __aexit__ when __aenter__ raises, so the except BaseException + block in __aenter__ is the only cleanup path. Verify no unfinished span is leaked. + """ + + class _FailingConnectModel(_FakeRealtimeModel): + async def connect(self, options: Any) -> None: + raise RuntimeError("simulated connection failure") + + agent = RealtimeAgent(name="agent") + session = RealtimeSession( + model=_FailingConnectModel(), + agent=agent, + context=None, + run_config={}, + ) + + with trace("test"): + with pytest.raises(RuntimeError): + async with session: + pass + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert len(agent_spans) == 1, f"Expected 1 agent span, got {len(agent_spans)}" + assert agent_spans[0].ended_at is not None, ( + "Agent span must be finished (not leaked) when __aenter__ raises." + ) + + @pytest.mark.asyncio async def test_span_tool_metadata_reflects_model_config_override(): """model_config.initial_model_settings tool override must be reflected in span metadata. From 483f41e706a309fbd6f5129bc0c99e1cc2f592fe Mon Sep 17 00:00:00 2001 From: jordanchendev Date: Thu, 4 Jun 2026 14:32:06 +0800 Subject: [PATCH 8/8] fix: finish outgoing span and start new one in update_agent() update_agent() now mirrors the handoff path: finishes the outgoing agent span (reset_current=False), creates and starts a new span for the incoming agent (mark_as_current=False), and populates span metadata from _get_updated_model_settings_from_agent() tuple. Adds test_update_agent_finishes_old_span_and_starts_new_one to verify both agent spans are emitted and the original span is closed. Co-Authored-By: Claude Sonnet 4.6 --- src/agents/realtime/session.py | 20 +++++++++++++++- tests/realtime/test_session_spans.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/agents/realtime/session.py b/src/agents/realtime/session.py index 266e7b2723..cf745919a2 100644 --- a/src/agents/realtime/session.py +++ b/src/agents/realtime/session.py @@ -315,13 +315,31 @@ async def interrupt(self) -> None: async def update_agent(self, agent: RealtimeAgent) -> None: """Update the active agent for this session and apply its settings to the model.""" + # Finish the outgoing agent span before switching agents, mirroring the handoff path. + if self._current_agent_span is not None: + self._current_agent_span.finish(reset_current=False) + self._current_agent = agent + self._current_agent_span = self._make_agent_span(self._current_agent) + self._current_agent_span.start(mark_as_current=False) - updated_settings, _, _ = await self._get_updated_model_settings_from_agent( + ( + updated_settings, + resolved_tools, + enabled_handoffs, + ) = await self._get_updated_model_settings_from_agent( starting_settings=None, agent=self._current_agent, ) + if not isinstance(self._current_agent_span, NoOpSpan): + self._current_agent_span.span_data.tools = [ + n for t in resolved_tools if (n := get_tool_trace_name_for_tool(t)) is not None + ] or None + self._current_agent_span.span_data.handoffs = [ + h.agent_name for h in enabled_handoffs + ] or None + await self._model.send_event( RealtimeModelSendSessionUpdate(session_settings=updated_settings) ) diff --git a/tests/realtime/test_session_spans.py b/tests/realtime/test_session_spans.py index 9c16035ed5..9d038e9753 100644 --- a/tests/realtime/test_session_spans.py +++ b/tests/realtime/test_session_spans.py @@ -365,3 +365,38 @@ def my_tool() -> str: f"model_config tool override must clear tools from span, " f"got: {agent_spans[0].span_data.tools}" ) + + +@pytest.mark.asyncio +async def test_update_agent_finishes_old_span_and_starts_new_one(): + """update_agent() must finish the outgoing span and emit a new span for the incoming agent. + + Convention: update_agent() is the public API equivalent of a handoff. It must mirror + the handoff path: finish the current agent span, then create and start a new one for + the incoming agent. Without this, activity after the switch is attributed to the wrong + agent and no span is emitted for the new agent. + """ + original = RealtimeAgent(name="original_agent") + replacement = RealtimeAgent(name="replacement_agent") + model = _FakeRealtimeModel() + session = _make_session(original, model) + + with trace("test"): + async with session: + await session.update_agent(replacement) + + spans = SPAN_PROCESSOR_TESTING.get_ordered_spans() + agent_spans = [s for s in spans if isinstance(s.span_data, AgentSpanData)] + assert len(agent_spans) == 2, ( + f"Expected 2 agent spans (original + replacement), got {len(agent_spans)}" + ) + + names = {s.span_data.name for s in agent_spans} + assert names == {"original_agent", "replacement_agent"}, ( + f"Expected spans for both agents, got: {names}" + ) + + original_span = next(s for s in agent_spans if s.span_data.name == "original_agent") + assert original_span.ended_at is not None, ( + "Original agent span must be finished after update_agent()" + )