diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1390455317..b9cb43887a 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,10 +5,11 @@
 import os
 import json
 import logging
+import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
-from sentry_sdk.consts import SPANDATA
+from sentry_sdk.consts import SPANDATA, OP
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
@@ -314,6 +315,25 @@ def test_agent_custom_model():
     )
 
 
+@pytest.fixture
+def get_model_response():
+    def inner(response_content):
+        model_request = httpx.Request(
+            "POST",
+            "/responses",
+        )
+
+        response = httpx.Response(
+            200,
+            request=model_request,
+            content=json.dumps(response_content.model_dump()).encode("utf-8"),
+        )
+
+        return response
+
+    return inner
+
+
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
     sentry_init, capture_events, test_agent, mock_model_response
@@ -1095,7 +1115,9 @@ async def test_max_turns_before_handoff_span(sentry_init, capture_events, mock_u
 
 
 @pytest.mark.asyncio
-async def test_tool_execution_span(sentry_init, capture_events, test_agent):
+async def test_tool_execution_span(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test tool execution span creation.
     """
@@ -1106,78 +1128,106 @@ def simple_test_tool(message: str) -> str:
         return f"Tool executed with: {message}"
 
     # Create agent with the tool
-    agent_with_tool = test_agent.clone(tools=[simple_test_tool])
-
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a mock response that includes tool calls
-            tool_call = ResponseFunctionToolCall(
-                id="call_123",
-                call_id="call_123",
-                name="simple_test_tool",
-                type="function_call",
-                arguments='{"message": "hello"}',
-            )
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent_with_tool = test_agent.clone(tools=[simple_test_tool], model=model)
 
-            # First response with tool call
-            tool_response = ModelResponse(
-                output=[tool_call],
-                usage=Usage(
-                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+    tool_response = get_model_response(
+        Response(
+            id="resp_tool_123",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="simple_test_tool",
+                    type="function_call",
+                    arguments='{"message": "hello"}',
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_tool_123",
-            )
-
-            # Second response with final answer
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="Task completed using the tool",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_123",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            # Return different responses on successive calls
-            mock_get_response.side_effect = [tool_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_123",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Task completed using the tool",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
+                ),
+                total_tokens=25,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the simple test tool",
-                run_config=test_run_config,
-            )
+        await agents.Runner.run(
+            agent_with_tool,
+            "Please use the simple test tool",
+            run_config=test_run_config,
+        )
 
     (transaction,) = events
     spans = transaction["spans"]
-    (
-        agent_span,
-        ai_client_span1,
-        tool_span,
-        ai_client_span2,
-    ) = spans
+    agent_span = next(span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT)
+    ai_client_span1, ai_client_span2 = (
+        span for span in spans if span["op"] == OP.GEN_AI_CHAT
+    )
+    tool_span = next(span for span in spans if span["op"] == OP.GEN_AI_EXECUTE_TOOL)
 
     available_tools = [
         {
@@ -2095,7 +2145,9 @@ def test_openai_agents_message_role_mapping(
 
 
 @pytest.mark.asyncio
-async def test_tool_execution_error_tracing(sentry_init, capture_events, test_agent):
+async def test_tool_execution_error_tracing(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test that tool execution errors are properly tracked via error tracing patch.
 
@@ -2113,70 +2165,100 @@ def failing_tool(message: str) -> str:
         raise ValueError("Tool execution failed")
 
     # Create agent with the failing tool
-    agent_with_tool = test_agent.clone(tools=[failing_tool])
-
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # Create a mock response that includes tool call
-            tool_call = ResponseFunctionToolCall(
-                id="call_123",
-                call_id="call_123",
-                name="failing_tool",
-                type="function_call",
-                arguments='{"message": "test"}',
-            )
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent_with_tool = test_agent.clone(tools=[failing_tool], model=model)
 
-            # First response with tool call
-            tool_response = ModelResponse(
-                output=[tool_call],
-                usage=Usage(
-                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+    tool_response = get_model_response(
+        Response(
+            id="resp_1",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="failing_tool",
+                    type="function_call",
+                    arguments='{"message": "test"}',
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_tool_123",
-            )
-
-            # Second response after tool error (agents library handles the error and continues)
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="An error occurred while running the tool",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_123",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [tool_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_2",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="An error occurred while running the tool",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4-0613",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=15,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
+                ),
+                total_tokens=25,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            # Note: The agents library catches tool exceptions internally,
-            # so we don't expect this to raise
-            await agents.Runner.run(
-                agent_with_tool,
-                "Please use the failing tool",
-                run_config=test_run_config,
-            )
+        # Note: The agents library catches tool exceptions internally,
+        # so we don't expect this to raise
+        await agents.Runner.run(
+            agent_with_tool,
+            "Please use the failing tool",
+            run_config=test_run_config,
+        )
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -2412,7 +2494,7 @@ async def test_ai_client_span_response_model_with_chat_completions(
 
 @pytest.mark.asyncio
 async def test_multiple_llm_calls_aggregate_usage(
-    sentry_init, capture_events, test_agent
+    sentry_init, capture_events, test_agent, get_model_response
 ):
     """
     Test that invoke_agent spans show aggregated usage across multiple LLM calls
@@ -2424,79 +2506,100 @@ def calculator(a: int, b: int) -> int:
         """Add two numbers"""
         return a + b
 
-    agent_with_tool = test_agent.clone(tools=[calculator])
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent_with_tool = test_agent.clone(tools=[calculator], model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            # First call: agent decides to use tool (10 input, 5 output tokens)
-            tool_call_response = ModelResponse(
-                output=[
-                    ResponseFunctionToolCall(
-                        id="call_123",
-                        call_id="call_123",
-                        name="calculator",
-                        type="function_call",
-                        arguments='{"a": 5, "b": 3}',
-                    )
-                ],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=10,
-                    output_tokens=5,
-                    total_tokens=15,
-                    input_tokens_details=InputTokensDetails(cached_tokens=0),
-                    output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
+    tool_call_response = get_model_response(
+        Response(
+            id="resp_1",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="calculator",
+                    type="function_call",
+                    arguments='{"a": 5, "b": 3}',
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_tool_call",
-            )
-
-            # Second call: agent uses tool result to respond (20 input, 15 output tokens)
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="The result is 8",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1,
-                    input_tokens=20,
-                    output_tokens=15,
-                    total_tokens=35,
-                    input_tokens_details=InputTokensDetails(cached_tokens=5),
-                    output_tokens_details=OutputTokensDetails(reasoning_tokens=3),
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [tool_call_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_2",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="The result is 8",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4-0613",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=20,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=5,
+                ),
+                output_tokens=15,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=3,
+                ),
+                total_tokens=35,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_call_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                agent_with_tool,
-                "What is 5 + 3?",
-                run_config=test_run_config,
-            )
+        result = await agents.Runner.run(
+            agent_with_tool,
+            "What is 5 + 3?",
+            run_config=test_run_config,
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -2656,7 +2759,10 @@ async def test_invoke_agent_span_includes_response_model(
 
 @pytest.mark.asyncio
 async def test_invoke_agent_span_uses_last_response_model(
-    sentry_init, capture_events, test_agent
+    sentry_init,
+    capture_events,
+    test_agent,
+    get_model_response,
 ):
     """
     Test that when an agent makes multiple LLM calls (e.g., with tools),
@@ -2668,17 +2774,14 @@ def calculator(a: int, b: int) -> int:
         """Add two numbers"""
         return a + b
 
-    agent_with_tool = test_agent.clone(tools=[calculator])
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent_with_tool = test_agent.clone(tools=[calculator], model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel._fetch_response"
-        ) as mock_fetch_response:
-            # First call: gpt-4 model returns tool call
-            first_response = MagicMock()
-            first_response.model = "gpt-4-0613"
-            first_response.id = "resp_1"
-            first_response.output = [
+    first_response = get_model_response(
+        Response(
+            id="resp_1",
+            output=[
                 ResponseFunctionToolCall(
                     id="call_123",
                     call_id="call_123",
@@ -2686,65 +2789,85 @@ def calculator(a: int, b: int) -> int:
                     type="function_call",
                     arguments='{"a": 5, "b": 3}',
                 )
-            ]
-            first_response.usage = MagicMock()
-            first_response.usage.input_tokens = 10
-            first_response.usage.output_tokens = 5
-            first_response.usage.total_tokens = 15
-            first_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=0
-            )
-            first_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=0
-            )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4-0613",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
+                ),
+                total_tokens=15,
+            ),
+        )
+    )
 
-            # Second call: different model version returns final message
-            second_response = MagicMock()
-            second_response.model = "gpt-4.1-2025-04-14"
-            second_response.id = "resp_2"
-            second_response.output = [
+    second_response = get_model_response(
+        Response(
+            id="resp_2",
+            output=[
                 ResponseOutputMessage(
                     id="msg_final",
                     type="message",
                     status="completed",
                     content=[
                         ResponseOutputText(
-                            text="The result is 8",
+                            text="I'm the specialist and I can help with that!",
                             type="output_text",
                             annotations=[],
                         )
                     ],
                     role="assistant",
                 )
-            ]
-            second_response.usage = MagicMock()
-            second_response.usage.input_tokens = 20
-            second_response.usage.output_tokens = 15
-            second_response.usage.total_tokens = 35
-            second_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=5
-            )
-            second_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=3
-            )
-
-            mock_fetch_response.side_effect = [first_response, second_response]
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4.1-2025-04-14",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=20,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
+                ),
+                output_tokens=15,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=5,
+                ),
+                total_tokens=35,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[first_response, second_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                agent_with_tool,
-                "What is 5 + 3?",
-                run_config=test_run_config,
-            )
+        result = await agents.Runner.run(
+            agent_with_tool,
+            "What is 5 + 3?",
+            run_config=test_run_config,
+        )
 
-            assert result is not None
+        assert result is not None
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -2966,7 +3089,9 @@ async def test_conversation_id_on_all_spans(
     reason="conversation_id support requires openai-agents >= 0.4.0",
 )
 @pytest.mark.asyncio
-async def test_conversation_id_on_tool_span(sentry_init, capture_events, test_agent):
+async def test_conversation_id_on_tool_span(
+    sentry_init, capture_events, test_agent, get_model_response
+):
     """
     Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run().
     """
@@ -2976,65 +3101,98 @@ def simple_tool(message: str) -> str:
         """A simple tool"""
         return f"Result: {message}"
 
-    agent_with_tool = test_agent.clone(tools=[simple_tool])
-
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            tool_call = ResponseFunctionToolCall(
-                id="call_123",
-                call_id="call_123",
-                name="simple_tool",
-                type="function_call",
-                arguments='{"message": "hello"}',
-            )
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent_with_tool = test_agent.clone(tools=[simple_tool], model=model)
 
-            tool_response = ModelResponse(
-                output=[tool_call],
-                usage=Usage(
-                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+    tool_response = get_model_response(
+        Response(
+            id="call_123",
+            output=[
+                ResponseFunctionToolCall(
+                    id="call_123",
+                    call_id="call_123",
+                    name="simple_tool",
+                    type="function_call",
+                    arguments='{"message": "hello"}',
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=10,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=0,
                 ),
-                response_id="resp_tool_456",
-            )
-
-            final_response = ModelResponse(
-                output=[
-                    ResponseOutputMessage(
-                        id="msg_final",
-                        type="message",
-                        status="completed",
-                        content=[
-                            ResponseOutputText(
-                                text="Done",
-                                type="output_text",
-                                annotations=[],
-                            )
-                        ],
-                        role="assistant",
-                    )
-                ],
-                usage=Usage(
-                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                output_tokens=5,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=0,
                 ),
-                response_id="resp_final_789",
-            )
+                total_tokens=15,
+            ),
+        )
+    )
 
-            mock_get_response.side_effect = [tool_response, final_response]
+    final_response = get_model_response(
+        Response(
+            id="resp_final_789",
+            output=[
+                ResponseOutputMessage(
+                    id="msg_final",
+                    type="message",
+                    status="completed",
+                    content=[
+                        ResponseOutputText(
+                            text="Done",
+                            type="output_text",
+                            annotations=[],
+                        )
+                    ],
+                    role="assistant",
+                )
+            ],
+            parallel_tool_calls=False,
+            tool_choice="none",
+            tools=[],
+            created_at=10000000,
+            model="gpt-4",
+            object="response",
+            usage=ResponseUsage(
+                input_tokens=20,
+                input_tokens_details=InputTokensDetails(
+                    cached_tokens=5,
+                ),
+                output_tokens=10,
+                output_tokens_details=OutputTokensDetails(
+                    reasoning_tokens=8,
+                ),
+                total_tokens=30,
+            ),
+        )
+    )
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    with patch.object(
+        agent_with_tool.model._client._client,
+        "send",
+        side_effect=[tool_response, final_response],
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            await agents.Runner.run(
-                agent_with_tool,
-                "Use the tool",
-                run_config=test_run_config,
-                conversation_id="conv_tool_test_456",
-            )
+        await agents.Runner.run(
+            agent_with_tool,
+            "Use the tool",
+            run_config=test_run_config,
+            conversation_id="conv_tool_test_456",
+        )
 
     (transaction,) = events
     spans = transaction["spans"]