Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions src/agents/run_internal/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"local_shell_call": "local_shell_call_output",
"tool_search_call": "tool_search_output",
}
_CALL_OUTPUT_TYPES: frozenset[str] = frozenset(_TOOL_CALL_TO_OUTPUT_TYPE.values())

__all__ = [
"ReasoningItemIdPolicy",
Expand All @@ -37,6 +38,7 @@
"TOOL_CALL_SESSION_TITLE_KEY",
"copy_input_items",
"drop_orphan_function_calls",
"drop_orphaned_messages_after_consumed_reasoning",
"ensure_input_item_format",
"prepare_model_input_items",
"run_item_to_input_item",
Expand Down Expand Up @@ -179,6 +181,63 @@ def _drop_reasoning_items_preceding_dropped_calls(
return [entry for idx, entry in enumerate(items) if idx not in excluded]


def drop_orphaned_messages_after_consumed_reasoning(
items: list[TResponseInputItem],
) -> list[TResponseInputItem]:
"""Drop message items that are orphaned because their preceding reasoning item was consumed
by a tool call.

The Responses API requires every message item to be paired with its own reasoning item. When
any tool call (function_call, computer_call, shell_call, etc.) follows a reasoning item, that
reasoning item is considered consumed by the call. Any message item that follows (e.g. the
handoff agent's closing message) has no paired reasoning and causes a 400 from some providers:
``Item 'msg_...' of type 'message' was provided without its required 'reasoning' item``.

The drop is scoped to the first message after the consuming call. Dropping resets the flag so
that later turns whose assistant messages legitimately lack a reasoning item are not affected.

This is the inverse of :func:`drop_orphan_function_calls`, which removes function calls
without outputs and their preceding reasoning items.
"""
fresh_reasoning = False # True when the most-recent reasoning item is not yet consumed
consumed_by_call = False # True after any tool call consumes the fresh reasoning
result: list[TResponseInputItem] = []

for item in items:
if not isinstance(item, dict):
result.append(item)
continue
item_type = item.get("type")

if item_type == "reasoning":
fresh_reasoning = True
consumed_by_call = False
result.append(item)
elif item_type in _TOOL_CALL_TO_OUTPUT_TYPE:
if fresh_reasoning:
fresh_reasoning = False
consumed_by_call = True # reasoning is now consumed by this call
result.append(item)
elif item_type in _CALL_OUTPUT_TYPES:
# Any call output (function_call_output, computer_call_output, etc.) marks the
# end of its call sequence. The SDK appends call outputs after all model output
# items, so any orphaned message has already been dropped by this point. Reset
# here so that turns with no trailing message do not bleed consumed_by_call into
# the next agent's responses regardless of the call type.
consumed_by_call = False
result.append(item)
elif item_type == "message":
if not consumed_by_call:
result.append(item)
Comment on lines +229 to +231
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Limit orphan pruning to the consumed reasoning turn

Because generated_items is cumulative across turns before it reaches prepare_model_input_items, this state machine can span multiple model responses. After any reasoning-backed function_call/computer_call, fresh_reasoning stays false until another reasoning item, so a later valid assistant message from a provider/model turn that does not emit reasoning (for example a message plus another tool/handoff that requires a following turn) is silently removed even though it is not the trailing message orphaned by the original handoff call. That loses assistant history and can change subsequent tool or handoff behavior; the pruning should be constrained to messages tied to the same consumed reasoning item/response, not every later message in the accumulated history.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. The state machine was too broad; fresh_reasoning stayed false across all accumulated turns, meaning any later agent turn emitting a message without its own reasoning item would also be silently dropped.

Fixed by replacing the two-flag approach with a single consumed_by_call flag that resets to False as soon as the first orphaned message is dropped. Pruning is now scoped to exactly one trailing message per handoff turn. Updated the test to make the cross-turn invariant explicit: the delegate's response (no reasoning) must survive and appear in final_output.

Comment on lines +229 to +231
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve user messages after consumed tool calls

When the consumed-call state is still set, this branch drops every type == "message" item without checking role. In a resumed/session history that ends with an unanswered reasoning-backed tool call, the next turn's user input is represented as a message immediately after that stale call; this new pass removes the user's follow-up before drop_orphan_function_calls() later cleans up the stale call/reasoning pair, so the next model request can silently lose the user's input. The pruning should be limited to assistant/model messages that can actually be orphaned by consumed reasoning.

Useful? React with 👍 / 👎.

# else: orphaned — reasoning consumed by the preceding call; drop without resetting
# so that any further messages in the same turn are also dropped until a
# call-output item resets consumed_by_call.
else:
result.append(item)

return result


def ensure_input_item_format(item: TResponseInputItem) -> TResponseInputItem:
"""Ensure a single item is normalized for model input."""
coerced = _coerce_to_dict(item)
Expand Down Expand Up @@ -213,7 +272,8 @@ def prepare_model_input_items(
return normalized_caller_items

normalized_generated_items = normalize_input_items_for_api(list(generated_items))
filtered_generated_items = drop_orphan_function_calls(normalized_generated_items)
filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items)
filtered_generated_items = drop_orphan_function_calls(filtered_generated_items)
return normalized_caller_items + filtered_generated_items


Expand All @@ -223,7 +283,8 @@ def normalize_resumed_input(
"""Normalize resumed list inputs and drop orphan tool calls."""
if isinstance(raw_input, list):
normalized = normalize_input_items_for_api(raw_input)
return drop_orphan_function_calls(normalized)
filtered = drop_orphaned_messages_after_consumed_reasoning(normalized)
return drop_orphan_function_calls(filtered)
return raw_input


Expand Down
4 changes: 3 additions & 1 deletion src/agents/run_internal/oai_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .items import (
ReasoningItemIdPolicy,
drop_orphan_function_calls,
drop_orphaned_messages_after_consumed_reasoning,
fingerprint_input_item,
normalize_input_items_for_api,
prepare_model_input_items,
Expand Down Expand Up @@ -501,7 +502,8 @@ def prepare_input(
normalized_generated_items, prepared_generated_items, strict=False
)
}
filtered_generated_items = drop_orphan_function_calls(normalized_generated_items)
filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items)
filtered_generated_items = drop_orphan_function_calls(filtered_generated_items)
for item in filtered_generated_items:
prepared_source_item = normalized_generated_sources.get(id(item))
if prepared_source_item is not None:
Expand Down
4 changes: 3 additions & 1 deletion src/agents/run_internal/session_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
copy_input_items,
deduplicate_input_items_preferring_latest,
drop_orphan_function_calls,
drop_orphaned_messages_after_consumed_reasoning,
ensure_input_item_format,
fingerprint_input_item,
normalize_input_items_for_api,
Expand Down Expand Up @@ -176,8 +177,9 @@ async def prepare_input_with_session(
prune_history_indexes,
)
prepared_as_inputs = [ensure_input_item_format(item) for item in prepared_items_raw]
filtered = drop_orphaned_messages_after_consumed_reasoning(prepared_as_inputs)
filtered = drop_orphan_function_calls(
prepared_as_inputs,
filtered,
pruning_indexes=prune_history_indexes,
)
normalized = normalize_input_items_for_api(filtered)
Expand Down
Loading