From 5e31ae41f58326ba1f1d4ccbdbdd8549562091c4 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Sat, 2 May 2026 14:46:59 +0900 Subject: [PATCH 1/3] test: add more unit tests for uncovered patterns --- .../experiemental/codex/test_payloads.py | 45 +++++++ tests/sandbox/test_session_state_roundtrip.py | 95 ++++++++++++++ tests/sandbox/test_token_truncation.py | 96 ++++++++++++++ tests/sandbox/test_workspace_payloads.py | 123 ++++++++++++++++++ tests/test_pretty_print.py | 58 ++++++++- tests/test_run_internal_approvals.py | 123 ++++++++++++++++++ 6 files changed, 538 insertions(+), 2 deletions(-) create mode 100644 tests/extensions/experiemental/codex/test_payloads.py create mode 100644 tests/sandbox/test_token_truncation.py create mode 100644 tests/sandbox/test_workspace_payloads.py create mode 100644 tests/test_run_internal_approvals.py diff --git a/tests/extensions/experiemental/codex/test_payloads.py b/tests/extensions/experiemental/codex/test_payloads.py new file mode 100644 index 0000000000..3041e7d324 --- /dev/null +++ b/tests/extensions/experiemental/codex/test_payloads.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import pytest + +from agents.extensions.experimental.codex.items import AgentMessageItem, TodoItem, TodoListItem + + +def test_dict_like_supports_mapping_access_for_dataclass_fields() -> None: + item = AgentMessageItem(id="item-1", text="hello") + + assert item["id"] == "item-1" + assert item["text"] == "hello" + assert item["type"] == "agent_message" + assert item.get("text") == "hello" + assert item.get("missing", "fallback") == "fallback" + assert "id" in item + assert "missing" not in item + assert object() not in item + assert list(item.keys()) == ["id", "text", "type"] + + +def test_dict_like_raises_key_error_for_unknown_fields() -> None: + item = AgentMessageItem(id="item-1", text="hello") + + with pytest.raises(KeyError, match="missing"): + _ = item["missing"] + + +def test_dict_like_as_dict_recursively_converts_nested_dataclasses() -> None: + item = TodoListItem( + id="todo-list-1", + items=[ + TodoItem(text="write tests", completed=True), + TodoItem(text="run tests", completed=False), + ], + ) + + assert item.as_dict() == { + "id": "todo-list-1", + "items": [ + {"text": "write tests", "completed": True}, + {"text": "run tests", "completed": False}, + ], + "type": "todo_list", + } diff --git a/tests/sandbox/test_session_state_roundtrip.py b/tests/sandbox/test_session_state_roundtrip.py index f90d0b8bba..7c0ac73ec7 100644 --- a/tests/sandbox/test_session_state_roundtrip.py +++ b/tests/sandbox/test_session_state_roundtrip.py @@ -12,6 +12,9 @@ from pathlib import Path from typing import Literal +import pytest +from pydantic import ValidationError + from agents.sandbox import Manifest from agents.sandbox.session import SandboxSessionState from agents.sandbox.snapshot import LocalSnapshot @@ -27,6 +30,21 @@ class _StubSessionState(SandboxSessionState): custom_field: str +class _PlainTypeSessionState(SandboxSessionState): + __test__ = False + type: str = "plain-type" + + +class _EmptyDefaultSessionState(SandboxSessionState): + __test__ = False + type: Literal[""] = "" + + +class _SimpleSessionState(SandboxSessionState): + __test__ = False + type: Literal["simple-roundtrip"] = "simple-roundtrip" + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -93,3 +111,80 @@ def test_model_dump_preserves_snapshot_subclass_fields(self) -> None: dumped = state.model_dump() assert "base_path" in dumped["snapshot"] + + def test_parse_returns_subclass_instances_as_is(self) -> None: + state = _make_session_state() + + assert SandboxSessionState.parse(state) is state + + def test_parse_upgrades_base_instance_through_registry(self) -> None: + state = _SimpleSessionState( + session_id=uuid.UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), + snapshot=LocalSnapshot(id="snap-1", base_path=Path("/tmp/snapshots")), + manifest=Manifest(), + ) + base_instance = SandboxSessionState.model_validate(state.model_dump()) + + reconstructed = SandboxSessionState.parse(base_instance) + + assert type(reconstructed) is _SimpleSessionState + assert reconstructed.session_id == uuid.UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb") + + @pytest.mark.parametrize( + ("payload", "error_type", "message"), + [ + ({}, ValueError, "must include a string `type`"), + ({"type": "missing"}, ValueError, "unknown sandbox session state type `missing`"), + ("not-a-state", TypeError, "session state payload must be"), + ], + ) + def test_parse_rejects_invalid_payloads( + self, + payload: object, + error_type: type[Exception], + message: str, + ) -> None: + with pytest.raises(error_type, match=message): + SandboxSessionState.parse(payload) + + def test_subclass_registration_skips_non_literal_or_empty_type_defaults(self) -> None: + assert "plain-type" not in SandboxSessionState._subclass_registry + assert "" not in SandboxSessionState._subclass_registry + + @pytest.mark.parametrize( + ("raw_ports", "expected"), + [ + (None, ()), + (8080, (8080,)), + ([8080, 9000, 8080], (8080, 9000)), + ], + ) + def test_exposed_ports_are_normalized( + self, raw_ports: object, expected: tuple[int, ...] + ) -> None: + state = _StubSessionState( + snapshot=LocalSnapshot(id="snap-1", base_path=Path("/tmp/snapshots")), + manifest=Manifest(), + custom_field="my-value", + exposed_ports=raw_ports, # type: ignore[arg-type] + ) + + assert state.exposed_ports == expected + + @pytest.mark.parametrize( + ("raw_ports", "message"), + [ + ("8080", "exposed_ports must be an iterable"), + ([8080, "9000"], "exposed_ports must contain integers"), + ([0], "exposed_ports entries must be between 1 and 65535"), + ([65536], "exposed_ports entries must be between 1 and 65535"), + ], + ) + def test_exposed_ports_reject_invalid_values(self, raw_ports: object, message: str) -> None: + with pytest.raises((TypeError, ValidationError), match=message): + _StubSessionState( + snapshot=LocalSnapshot(id="snap-1", base_path=Path("/tmp/snapshots")), + manifest=Manifest(), + custom_field="my-value", + exposed_ports=raw_ports, # type: ignore[arg-type] + ) diff --git a/tests/sandbox/test_token_truncation.py b/tests/sandbox/test_token_truncation.py new file mode 100644 index 0000000000..fdd0f0627c --- /dev/null +++ b/tests/sandbox/test_token_truncation.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +from agents.sandbox.util.token_truncation import ( + TruncationPolicy, + approx_bytes_for_tokens, + approx_token_count, + approx_tokens_from_byte_count, + format_truncation_marker, + formatted_truncate_text, + formatted_truncate_text_with_token_count, + removed_units_for_source, + split_budget, + split_string, + truncate_text, + truncate_with_byte_estimate, + truncate_with_token_budget, +) + + +def test_truncation_policy_clamps_negative_limits_and_converts_budgets() -> None: + byte_policy = TruncationPolicy.bytes(-10) + token_policy = TruncationPolicy.tokens(-2) + + assert byte_policy.limit == 0 + assert byte_policy.token_budget() == 0 + assert byte_policy.byte_budget() == 0 + assert token_policy.limit == 0 + assert token_policy.token_budget() == 0 + assert token_policy.byte_budget() == 0 + + +def test_formatted_truncate_text_returns_short_content_unchanged() -> None: + assert formatted_truncate_text("short", TruncationPolicy.bytes(20)) == "short" + + +def test_formatted_truncate_text_adds_line_count_when_truncated() -> None: + result = formatted_truncate_text("alpha\nbeta\ngamma", TruncationPolicy.bytes(8)) + + assert result.startswith("Total output lines: 3\n\n") + assert "chars truncated" in result + + +def test_formatted_truncate_text_with_token_count_handles_none_and_short_content() -> None: + assert formatted_truncate_text_with_token_count("short", None) == ("short", None) + assert formatted_truncate_text_with_token_count("short", 10) == ("short", None) + + +def test_formatted_truncate_text_with_token_count_reports_original_count() -> None: + result, original_token_count = formatted_truncate_text_with_token_count("abcdefghi", 1) + + assert result.startswith("Total output lines: 1\n\n") + assert "tokens truncated" in result + assert original_token_count == approx_token_count("abcdefghi") + + +def test_truncate_text_dispatches_byte_and_token_modes() -> None: + assert truncate_text("abcdef", TruncationPolicy.bytes(4)).startswith("a") + assert "tokens truncated" in truncate_text("abcdefghi", TruncationPolicy.tokens(1)) + + +def test_truncate_with_token_budget_handles_empty_and_short_content() -> None: + assert truncate_with_token_budget("", TruncationPolicy.tokens(1)) == ("", None) + assert truncate_with_token_budget("abc", TruncationPolicy.tokens(1)) == ("abc", None) + + +def test_truncate_with_byte_estimate_handles_empty_zero_and_short_content() -> None: + assert truncate_with_byte_estimate("", TruncationPolicy.bytes(0)) == "" + assert "chars truncated" in truncate_with_byte_estimate("abc", TruncationPolicy.bytes(0)) + assert truncate_with_byte_estimate("abc", TruncationPolicy.bytes(10)) == "abc" + + +def test_split_string_preserves_utf8_boundaries() -> None: + removed_chars, prefix, suffix = split_string("aあbいc", 2, 4) + + assert prefix == "a" + assert suffix == "いc" + assert removed_chars == 2 + + +def test_split_string_handles_empty_content() -> None: + assert split_string("", 10, 10) == (0, "", "") + + +def test_formatting_and_estimate_helpers() -> None: + byte_policy = TruncationPolicy.bytes(8) + token_policy = TruncationPolicy.tokens(2) + + assert "chars truncated" in format_truncation_marker(byte_policy, 3) + assert "tokens truncated" in format_truncation_marker(token_policy, 2) + assert split_budget(5) == (2, 3) + assert removed_units_for_source(byte_policy, removed_bytes=10, removed_chars=4) == 4 + assert removed_units_for_source(token_policy, removed_bytes=9, removed_chars=4) == 3 + assert approx_token_count("abcde") == 2 + assert approx_bytes_for_tokens(-1) == 0 + assert approx_tokens_from_byte_count(0) == 0 + assert approx_tokens_from_byte_count(5) == 2 diff --git a/tests/sandbox/test_workspace_payloads.py b/tests/sandbox/test_workspace_payloads.py new file mode 100644 index 0000000000..3a5b8d2b2c --- /dev/null +++ b/tests/sandbox/test_workspace_payloads.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import io +from pathlib import Path +from typing import Any, cast + +import pytest + +from agents.sandbox.errors import ErrorCode, WorkspaceWriteTypeError +from agents.sandbox.session.workspace_payloads import coerce_write_payload + + +class _Headers: + def __init__(self, value: str | None) -> None: + self._value = value + + def get(self, name: str) -> str | None: + assert name == "Content-Length" + return self._value + + +class _HeaderStream(io.BytesIO): + def __init__(self, data: bytes, content_length: str | None) -> None: + super().__init__(data) + self.headers = _Headers(content_length) + + +class _LengthStream(io.BytesIO): + def __init__(self, data: bytes, length: int) -> None: + super().__init__(data) + self.length = length + + +class _NoneReadStream: + def read(self, size: int = -1) -> Any: + _ = size + return None + + +class _BytearrayReadStream: + def read(self, size: int = -1) -> Any: + _ = size + return bytearray(b"abc") + + +class _TextReadStream: + def read(self, size: int = -1) -> Any: + _ = size + return "not-bytes" + + +class _UnseekableStream(io.BytesIO): + def tell(self) -> int: + raise OSError("not seekable") + + +def test_coerce_write_payload_adapts_binary_reads() -> None: + payload = coerce_write_payload(path=Path("/workspace/file.bin"), data=io.BytesIO(b"abc")) + + assert payload.content_length == 3 + assert payload.stream.readable() is True + assert payload.stream.read(1) == b"a" + assert payload.stream.read() == b"bc" + + +def test_coerce_write_payload_adapts_bytearray_and_none_reads() -> None: + bytearray_payload = coerce_write_payload( + path=Path("/workspace/file.bin"), + data=cast(io.IOBase, _BytearrayReadStream()), + ) + none_payload = coerce_write_payload( + path=Path("/workspace/empty.bin"), + data=cast(io.IOBase, _NoneReadStream()), + ) + + assert bytearray_payload.stream.read() == b"abc" + assert none_payload.stream.read() == b"" + + +def test_coerce_write_payload_supports_readinto_seek_and_tell() -> None: + payload = coerce_write_payload(path=Path("/workspace/file.bin"), data=io.BytesIO(b"abcdef")) + buffer = bytearray(3) + + assert cast(Any, payload.stream).readinto(buffer) == 3 + assert bytes(buffer) == b"abc" + assert payload.stream.tell() == 3 + assert payload.stream.seek(1) == 1 + assert payload.stream.read(2) == b"bc" + + +def test_coerce_write_payload_rejects_text_chunks() -> None: + payload = coerce_write_payload( + path=Path("/workspace/file.txt"), + data=cast(io.IOBase, _TextReadStream()), + ) + + with pytest.raises(WorkspaceWriteTypeError) as exc_info: + payload.stream.read() + + assert exc_info.value.error_code is ErrorCode.WORKSPACE_WRITE_TYPE_ERROR + assert exc_info.value.context == { + "path": "/workspace/file.txt", + "actual_type": "str", + } + + +@pytest.mark.parametrize( + ("stream", "expected"), + [ + (_LengthStream(b"abc", 5), 5), + (_HeaderStream(b"abc", "7"), 7), + (_HeaderStream(b"abc", "-1"), 3), + (_HeaderStream(b"abc", "invalid"), 3), + (_UnseekableStream(b"abc"), None), + ], +) +def test_coerce_write_payload_uses_best_effort_content_length( + stream: io.IOBase, + expected: int | None, +) -> None: + payload = coerce_write_payload(path=Path("/workspace/file.bin"), data=stream) + + assert payload.content_length == expected diff --git a/tests/test_pretty_print.py b/tests/test_pretty_print.py index b2218a279d..79327cfb92 100644 --- a/tests/test_pretty_print.py +++ b/tests/test_pretty_print.py @@ -4,9 +4,13 @@ from inline_snapshot import snapshot from pydantic import BaseModel -from agents import Agent, Runner +from agents import Agent, RunContextWrapper, RunErrorDetails, Runner, RunResult from agents.agent_output import _WRAPPER_DICT_KEY -from agents.util._pretty_print import pretty_print_result, pretty_print_run_result_streaming +from agents.util._pretty_print import ( + pretty_print_result, + pretty_print_run_error_details, + pretty_print_run_result_streaming, +) from tests.fake_model import FakeModel from .test_responses import get_final_output_message, get_text_message @@ -33,6 +37,56 @@ async def test_pretty_result(): """) +def test_pretty_result_handles_none_final_output(): + agent = Agent(name="none_agent") + result = RunResult( + input="Hello", + new_items=[], + raw_responses=[], + final_output=None, + input_guardrail_results=[], + output_guardrail_results=[], + tool_input_guardrail_results=[], + tool_output_guardrail_results=[], + context_wrapper=RunContextWrapper(context=None), + _last_agent=agent, + ) + + assert pretty_print_result(result) == snapshot("""\ +RunResult: +- Last agent: Agent(name="none_agent", ...) +- Final output (NoneType): + None +- 0 new item(s) +- 0 raw response(s) +- 0 input guardrail result(s) +- 0 output guardrail result(s) +(See `RunResult` for more details)\ +""") + + +def test_pretty_run_error_details(): + agent = Agent(name="error_agent") + details = RunErrorDetails( + input="Hello", + new_items=[], + raw_responses=[], + last_agent=agent, + context_wrapper=RunContextWrapper(context=None), + input_guardrail_results=[], + output_guardrail_results=[], + ) + + assert pretty_print_run_error_details(details) == snapshot("""\ +RunErrorDetails: +- Last agent: Agent(name="error_agent", ...) +- 0 new item(s) +- 0 raw response(s) +- 0 input guardrail result(s) +(See `RunErrorDetails` for more details)\ +""") + + @pytest.mark.asyncio async def test_pretty_run_result_streaming(): model = FakeModel() diff --git a/tests/test_run_internal_approvals.py b/tests/test_run_internal_approvals.py new file mode 100644 index 0000000000..44c57f137e --- /dev/null +++ b/tests/test_run_internal_approvals.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, cast + +from openai.types.responses import ResponseFunctionToolCall + +from agents import Agent +from agents.items import MessageOutputItem, ToolCallOutputItem, TResponseInputItem +from agents.run_internal.approvals import ( + _build_function_tool_call_for_approval_error, + append_approval_error_output, + append_input_items_excluding_approvals, + approvals_from_step, + filter_tool_approvals, +) +from tests.utils.factories import make_message_output, make_tool_approval_item, make_tool_call + + +@dataclass +class _Step: + interruptions: list[Any] + + +@dataclass +class _NoInterruptionsStep: + value: str + + +class _NamespacedToolCall: + namespace = "object_namespace" + + +def test_filter_tool_approvals_keeps_only_approval_items() -> None: + agent = Agent(name="test") + approval = make_tool_approval_item(agent) + + assert filter_tool_approvals(["text", approval, object()]) == [approval] + + +def test_approvals_from_step_handles_missing_and_mixed_interruptions() -> None: + agent = Agent(name="test") + approval = make_tool_approval_item(agent) + + assert approvals_from_step(_NoInterruptionsStep("none")) == [] + assert approvals_from_step(_Step(["other", approval])) == [approval] + + +def test_append_input_items_excluding_approvals_skips_approval_placeholders() -> None: + agent = Agent(name="test") + base_input: list[TResponseInputItem] = [] + message = MessageOutputItem(agent=agent, raw_item=make_message_output(text="done")) + approval = make_tool_approval_item(agent, call_id="call_approval") + + append_input_items_excluding_approvals(base_input, [message, approval]) + + assert len(base_input) == 1 + assert cast(dict[str, Any], base_input[0])["type"] == "message" + + +def test_append_approval_error_output_emits_function_tool_output() -> None: + agent = Agent(name="test") + generated_items: list[Any] = [] + + append_approval_error_output( + generated_items=generated_items, + agent=agent, + tool_call={"namespace": "dict_namespace"}, + tool_name="needs_approval", + call_id=None, + message="approval denied", + ) + + assert len(generated_items) == 1 + output_item = generated_items[0] + assert isinstance(output_item, ToolCallOutputItem) + assert output_item.agent is agent + assert output_item.output == "approval denied" + assert output_item.raw_item == { + "type": "function_call_output", + "call_id": "unknown", + "output": "approval denied", + } + + +def test_build_function_tool_call_for_approval_error_reuses_typed_calls() -> None: + tool_call = make_tool_call(call_id="call_1", name="typed_tool") + + assert ( + _build_function_tool_call_for_approval_error(tool_call, "ignored", "ignored") is tool_call + ) + + +def test_build_function_tool_call_for_approval_error_preserves_namespace_sources() -> None: + from_dict = _build_function_tool_call_for_approval_error( + {"namespace": "dict_namespace"}, + "dict_tool", + "call_dict", + ) + from_object = _build_function_tool_call_for_approval_error( + _NamespacedToolCall(), + "object_tool", + "call_object", + ) + + assert isinstance(from_dict, ResponseFunctionToolCall) + assert from_dict.namespace == "dict_namespace" + assert from_dict.call_id == "call_dict" + assert from_object.namespace == "object_namespace" + assert from_object.call_id == "call_object" + + +def test_build_function_tool_call_for_approval_error_ignores_empty_namespaces() -> None: + tool_call = _build_function_tool_call_for_approval_error( + {"namespace": ""}, + "tool", + "call_1", + ) + + assert not hasattr(tool_call, "namespace") or tool_call.namespace is None + assert tool_call.name == "tool" + assert tool_call.arguments == "{}" + assert tool_call.status == "completed" From 6d06268ab02ab6ff6482d006a526a09fde69da6a Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Sat, 2 May 2026 15:01:56 +0900 Subject: [PATCH 2/3] organize tests --- tests/conftest.py | 2 +- .../{test_sandbox_blaxel.py => sandbox/test_blaxel.py} | 0 .../test_cloudflare.py} | 0 .../{test_sandbox_daytona.py => sandbox/test_daytona.py} | 0 .../extensions/{test_sandbox_e2b.py => sandbox/test_e2b.py} | 0 .../{test_sandbox_modal.py => sandbox/test_modal.py} | 0 .../{test_sandbox_runloop.py => sandbox/test_runloop.py} | 0 .../{ => sandbox}/test_runloop_capabilities_example.py | 4 ++-- .../test_runloop_mounts.py} | 0 .../{test_sandbox_vercel.py => sandbox/test_vercel.py} | 0 tests/{ => memory}/test_openai_conversations_session.py | 0 tests/{ => memory}/test_session.py | 5 ++--- tests/{ => memory}/test_session_limit.py | 2 +- tests/{ => models}/test_anthropic_thinking_blocks.py | 0 tests/{ => models}/test_extended_thinking_message_order.py | 0 tests/{ => models}/test_gemini_thought_signatures.py | 0 tests/{ => models}/test_gemini_thought_signatures_stream.py | 0 tests/{ => models}/test_model_payload_iterators.py | 0 tests/{ => models}/test_model_retry.py | 3 +-- tests/{ => models}/test_openai_chatcompletions.py | 0 tests/{ => models}/test_openai_chatcompletions_converter.py | 0 tests/{ => models}/test_openai_chatcompletions_stream.py | 0 tests/{ => models}/test_openai_client_utils.py | 0 tests/{ => models}/test_openai_responses.py | 0 tests/{ => models}/test_openai_responses_converter.py | 0 tests/{ => models}/test_reasoning_content.py | 0 .../test_remove_openai_responses_api_incompatible_fields.py | 0 tests/{ => models}/test_responses_websocket_session.py | 0 tests/{ => realtime}/test_session_exceptions.py | 0 tests/{test_sandbox_memory.py => sandbox/test_memory.py} | 0 .../test_runtime_agent_preparation.py} | 0 31 files changed, 7 insertions(+), 9 deletions(-) rename tests/extensions/{test_sandbox_blaxel.py => sandbox/test_blaxel.py} (100%) rename tests/extensions/{test_sandbox_cloudflare.py => sandbox/test_cloudflare.py} (100%) rename tests/extensions/{test_sandbox_daytona.py => sandbox/test_daytona.py} (100%) rename tests/extensions/{test_sandbox_e2b.py => sandbox/test_e2b.py} (100%) rename tests/extensions/{test_sandbox_modal.py => sandbox/test_modal.py} (100%) rename tests/extensions/{test_sandbox_runloop.py => sandbox/test_runloop.py} (100%) rename tests/extensions/{ => sandbox}/test_runloop_capabilities_example.py (98%) rename tests/extensions/{test_sandbox_runloop_mounts.py => sandbox/test_runloop_mounts.py} (100%) rename tests/extensions/{test_sandbox_vercel.py => sandbox/test_vercel.py} (100%) rename tests/{ => memory}/test_openai_conversations_session.py (100%) rename tests/{ => memory}/test_session.py (99%) rename tests/{ => memory}/test_session_limit.py (99%) rename tests/{ => models}/test_anthropic_thinking_blocks.py (100%) rename tests/{ => models}/test_extended_thinking_message_order.py (100%) rename tests/{ => models}/test_gemini_thought_signatures.py (100%) rename tests/{ => models}/test_gemini_thought_signatures_stream.py (100%) rename tests/{ => models}/test_model_payload_iterators.py (100%) rename tests/{ => models}/test_model_retry.py (99%) rename tests/{ => models}/test_openai_chatcompletions.py (100%) rename tests/{ => models}/test_openai_chatcompletions_converter.py (100%) rename tests/{ => models}/test_openai_chatcompletions_stream.py (100%) rename tests/{ => models}/test_openai_client_utils.py (100%) rename tests/{ => models}/test_openai_responses.py (100%) rename tests/{ => models}/test_openai_responses_converter.py (100%) rename tests/{ => models}/test_reasoning_content.py (100%) rename tests/{ => models}/test_remove_openai_responses_api_incompatible_fields.py (100%) rename tests/{ => models}/test_responses_websocket_session.py (100%) rename tests/{ => realtime}/test_session_exceptions.py (100%) rename tests/{test_sandbox_memory.py => sandbox/test_memory.py} (100%) rename tests/{test_sandbox_runtime_agent_preparation.py => sandbox/test_runtime_agent_preparation.py} (100%) diff --git a/tests/conftest.py b/tests/conftest.py index 21a3f6d7b5..c279b6c9ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,12 +20,12 @@ [ "test_example_workflows.py", "test_run_state.py", - "test_sandbox_memory.py", "sandbox/capabilities/test_filesystem_capability.py", "sandbox/integration_tests/test_runner_pause_resume.py", "sandbox/test_client_options.py", "sandbox/test_exposed_ports.py", "sandbox/test_extract.py", + "sandbox/test_memory.py", "sandbox/test_runtime.py", "sandbox/test_session_manager.py", "sandbox/test_session_sinks.py", diff --git a/tests/extensions/test_sandbox_blaxel.py b/tests/extensions/sandbox/test_blaxel.py similarity index 100% rename from tests/extensions/test_sandbox_blaxel.py rename to tests/extensions/sandbox/test_blaxel.py diff --git a/tests/extensions/test_sandbox_cloudflare.py b/tests/extensions/sandbox/test_cloudflare.py similarity index 100% rename from tests/extensions/test_sandbox_cloudflare.py rename to tests/extensions/sandbox/test_cloudflare.py diff --git a/tests/extensions/test_sandbox_daytona.py b/tests/extensions/sandbox/test_daytona.py similarity index 100% rename from tests/extensions/test_sandbox_daytona.py rename to tests/extensions/sandbox/test_daytona.py diff --git a/tests/extensions/test_sandbox_e2b.py b/tests/extensions/sandbox/test_e2b.py similarity index 100% rename from tests/extensions/test_sandbox_e2b.py rename to tests/extensions/sandbox/test_e2b.py diff --git a/tests/extensions/test_sandbox_modal.py b/tests/extensions/sandbox/test_modal.py similarity index 100% rename from tests/extensions/test_sandbox_modal.py rename to tests/extensions/sandbox/test_modal.py diff --git a/tests/extensions/test_sandbox_runloop.py b/tests/extensions/sandbox/test_runloop.py similarity index 100% rename from tests/extensions/test_sandbox_runloop.py rename to tests/extensions/sandbox/test_runloop.py diff --git a/tests/extensions/test_runloop_capabilities_example.py b/tests/extensions/sandbox/test_runloop_capabilities_example.py similarity index 98% rename from tests/extensions/test_runloop_capabilities_example.py rename to tests/extensions/sandbox/test_runloop_capabilities_example.py index fafacb521f..c87a3ffcc0 100644 --- a/tests/extensions/test_runloop_capabilities_example.py +++ b/tests/extensions/sandbox/test_runloop_capabilities_example.py @@ -11,14 +11,14 @@ def _load_example_module() -> Any: path = ( - Path(__file__).resolve().parents[2] + Path(__file__).resolve().parents[3] / "examples" / "sandbox" / "extensions" / "runloop" / "capabilities.py" ) - module_name = "tests.extensions.runloop_capabilities_example" + module_name = "tests.extensions.sandbox.runloop_capabilities_example" spec = importlib.util.spec_from_file_location(module_name, path) assert spec is not None assert spec.loader is not None diff --git a/tests/extensions/test_sandbox_runloop_mounts.py b/tests/extensions/sandbox/test_runloop_mounts.py similarity index 100% rename from tests/extensions/test_sandbox_runloop_mounts.py rename to tests/extensions/sandbox/test_runloop_mounts.py diff --git a/tests/extensions/test_sandbox_vercel.py b/tests/extensions/sandbox/test_vercel.py similarity index 100% rename from tests/extensions/test_sandbox_vercel.py rename to tests/extensions/sandbox/test_vercel.py diff --git a/tests/test_openai_conversations_session.py b/tests/memory/test_openai_conversations_session.py similarity index 100% rename from tests/test_openai_conversations_session.py rename to tests/memory/test_openai_conversations_session.py diff --git a/tests/test_session.py b/tests/memory/test_session.py similarity index 99% rename from tests/test_session.py rename to tests/memory/test_session.py index aa8211500a..27b5c6fa7b 100644 --- a/tests/test_session.py +++ b/tests/memory/test_session.py @@ -8,9 +8,8 @@ import pytest from agents import Agent, RunConfig, Runner, SQLiteSession, TResponseInputItem - -from .fake_model import FakeModel -from .test_responses import get_text_message +from tests.fake_model import FakeModel +from tests.test_responses import get_text_message # Helper functions for parametrized testing of different Runner methods diff --git a/tests/test_session_limit.py b/tests/memory/test_session_limit.py similarity index 99% rename from tests/test_session_limit.py rename to tests/memory/test_session_limit.py index f8625f05c5..5b908ee967 100644 --- a/tests/test_session_limit.py +++ b/tests/memory/test_session_limit.py @@ -8,8 +8,8 @@ from agents import Agent, RunConfig, SQLiteSession from agents.memory import SessionSettings from tests.fake_model import FakeModel +from tests.memory.test_session import run_agent_async from tests.test_responses import get_text_message -from tests.test_session import run_agent_async @pytest.mark.parametrize("runner_method", ["run", "run_sync", "run_streamed"]) diff --git a/tests/test_anthropic_thinking_blocks.py b/tests/models/test_anthropic_thinking_blocks.py similarity index 100% rename from tests/test_anthropic_thinking_blocks.py rename to tests/models/test_anthropic_thinking_blocks.py diff --git a/tests/test_extended_thinking_message_order.py b/tests/models/test_extended_thinking_message_order.py similarity index 100% rename from tests/test_extended_thinking_message_order.py rename to tests/models/test_extended_thinking_message_order.py diff --git a/tests/test_gemini_thought_signatures.py b/tests/models/test_gemini_thought_signatures.py similarity index 100% rename from tests/test_gemini_thought_signatures.py rename to tests/models/test_gemini_thought_signatures.py diff --git a/tests/test_gemini_thought_signatures_stream.py b/tests/models/test_gemini_thought_signatures_stream.py similarity index 100% rename from tests/test_gemini_thought_signatures_stream.py rename to tests/models/test_gemini_thought_signatures_stream.py diff --git a/tests/test_model_payload_iterators.py b/tests/models/test_model_payload_iterators.py similarity index 100% rename from tests/test_model_payload_iterators.py rename to tests/models/test_model_payload_iterators.py diff --git a/tests/test_model_retry.py b/tests/models/test_model_retry.py similarity index 99% rename from tests/test_model_retry.py rename to tests/models/test_model_retry.py index 98b87fbea0..5a99efd282 100644 --- a/tests/test_model_retry.py +++ b/tests/models/test_model_retry.py @@ -25,8 +25,7 @@ ) from agents.run_internal.model_retry import get_response_with_retry, stream_response_with_retry from agents.usage import Usage - -from .test_responses import get_text_message +from tests.test_responses import get_text_message def _connection_error(message: str = "connection error") -> APIConnectionError: diff --git a/tests/test_openai_chatcompletions.py b/tests/models/test_openai_chatcompletions.py similarity index 100% rename from tests/test_openai_chatcompletions.py rename to tests/models/test_openai_chatcompletions.py diff --git a/tests/test_openai_chatcompletions_converter.py b/tests/models/test_openai_chatcompletions_converter.py similarity index 100% rename from tests/test_openai_chatcompletions_converter.py rename to tests/models/test_openai_chatcompletions_converter.py diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/models/test_openai_chatcompletions_stream.py similarity index 100% rename from tests/test_openai_chatcompletions_stream.py rename to tests/models/test_openai_chatcompletions_stream.py diff --git a/tests/test_openai_client_utils.py b/tests/models/test_openai_client_utils.py similarity index 100% rename from tests/test_openai_client_utils.py rename to tests/models/test_openai_client_utils.py diff --git a/tests/test_openai_responses.py b/tests/models/test_openai_responses.py similarity index 100% rename from tests/test_openai_responses.py rename to tests/models/test_openai_responses.py diff --git a/tests/test_openai_responses_converter.py b/tests/models/test_openai_responses_converter.py similarity index 100% rename from tests/test_openai_responses_converter.py rename to tests/models/test_openai_responses_converter.py diff --git a/tests/test_reasoning_content.py b/tests/models/test_reasoning_content.py similarity index 100% rename from tests/test_reasoning_content.py rename to tests/models/test_reasoning_content.py diff --git a/tests/test_remove_openai_responses_api_incompatible_fields.py b/tests/models/test_remove_openai_responses_api_incompatible_fields.py similarity index 100% rename from tests/test_remove_openai_responses_api_incompatible_fields.py rename to tests/models/test_remove_openai_responses_api_incompatible_fields.py diff --git a/tests/test_responses_websocket_session.py b/tests/models/test_responses_websocket_session.py similarity index 100% rename from tests/test_responses_websocket_session.py rename to tests/models/test_responses_websocket_session.py diff --git a/tests/test_session_exceptions.py b/tests/realtime/test_session_exceptions.py similarity index 100% rename from tests/test_session_exceptions.py rename to tests/realtime/test_session_exceptions.py diff --git a/tests/test_sandbox_memory.py b/tests/sandbox/test_memory.py similarity index 100% rename from tests/test_sandbox_memory.py rename to tests/sandbox/test_memory.py diff --git a/tests/test_sandbox_runtime_agent_preparation.py b/tests/sandbox/test_runtime_agent_preparation.py similarity index 100% rename from tests/test_sandbox_runtime_agent_preparation.py rename to tests/sandbox/test_runtime_agent_preparation.py From 6fbdacee6b3d1152a29ced299b1f8a16bb410643 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Sat, 2 May 2026 15:09:25 +0900 Subject: [PATCH 3/3] fix test failures on windows --- tests/sandbox/test_workspace_payloads.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/sandbox/test_workspace_payloads.py b/tests/sandbox/test_workspace_payloads.py index 3a5b8d2b2c..5084da6ff2 100644 --- a/tests/sandbox/test_workspace_payloads.py +++ b/tests/sandbox/test_workspace_payloads.py @@ -89,8 +89,9 @@ def test_coerce_write_payload_supports_readinto_seek_and_tell() -> None: def test_coerce_write_payload_rejects_text_chunks() -> None: + path = Path("/workspace/file.txt") payload = coerce_write_payload( - path=Path("/workspace/file.txt"), + path=path, data=cast(io.IOBase, _TextReadStream()), ) @@ -99,7 +100,7 @@ def test_coerce_write_payload_rejects_text_chunks() -> None: assert exc_info.value.error_code is ErrorCode.WORKSPACE_WRITE_TYPE_ERROR assert exc_info.value.context == { - "path": "/workspace/file.txt", + "path": str(path), "actual_type": "str", }