Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/strands/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def __init__(
tool_executor: ToolExecutor | None = None,
retry_strategy: ModelRetryStrategy | _DefaultRetryStrategySentinel | None = _DEFAULT_RETRY_STRATEGY,
concurrent_invocation_mode: ConcurrentInvocationMode = ConcurrentInvocationMode.THROW,
max_iterations: int = 25,
):
"""Initialize the Agent with the specified configuration.

Expand Down Expand Up @@ -214,6 +215,17 @@ def __init__(
Set to "unsafe_reentrant" to skip lock acquisition entirely, allowing concurrent invocations.
Warning: "unsafe_reentrant" makes no guarantees about resulting behavior and is provided
only for advanced use cases where the caller understands the risks.
max_iterations: Maximum number of event-loop cycles (model + tool round trips) per
invocation. When the agent would exceed this many cycles, the loop halts,
appends a synthetic assistant message noting the cap was reached, and returns
an AgentResult with stop_reason='max_iterations'. No further model invocation
is made after the cap trips. The synthetic halt message carries
``metadata = {"usage": Usage(0,0,0), "metrics": Metrics(latencyMs=0),
"synthetic": True}`` — the ``"synthetic": True`` marker lets downstream
token-budgeting / analytics code filter out this terminal (it is NOT a real
model call; zeros reflect that but the marker makes the distinction
unambiguous). Defaults to 25. Precedent: LangChain AgentExecutor defaults
to 15; OpenAI Agents SDK defaults to 10.

Raises:
ValueError: If agent id contains path separators.
Expand Down Expand Up @@ -313,6 +325,13 @@ def __init__(
self._invocation_lock = threading.Lock()
self._concurrent_invocation_mode = concurrent_invocation_mode

# Reject bool explicitly — `isinstance(True, int)` is True in Python,
# so `Agent(max_iterations=True)` would otherwise silently set the cap
# to 1. Catch this before the int check.
if isinstance(max_iterations, bool) or not isinstance(max_iterations, int) or max_iterations < 1:
raise ValueError(f"max_iterations must be a positive integer, got {max_iterations!r}")
self._max_iterations = max_iterations

# In the future, we'll have a RetryStrategy base class but until
# that API is determined we only allow ModelRetryStrategy
if (
Expand Down Expand Up @@ -889,6 +908,26 @@ async def _run_loop(
current_messages: Messages | None = messages

while current_messages is not None:
# Reset the per-invocation cycle counter at the top of each loop
# iteration. Two cases this handles:
# (1) Fresh top-level invocation with a caller-supplied
# `invocation_state` dict that still carries a counter from
# a previous invoke_async call — that counter must not leak
# across invocations.
# (2) Hook-driven resume: `AfterInvocationEvent` handlers may set
# `resume=<messages>` to drive another leg of the loop. Each
# resume leg is logically a fresh invocation from the cap's
# perspective and must get a fresh `max_iterations` budget —
# otherwise the first leg's cycles consume the budget for
# every subsequent leg.
# We `pop` (rather than set to 0) so the key is absent in the
# `InitEventLoopEvent` payload — downstream consumers that emit
# initial-state telemetry shouldn't see a pre-loop "cycle 0".
# The ContextWindowOverflowException retry path in
# `_execute_event_loop_cycle` also resets the counter — `max_iterations`
# caps tool-call cycles, not model-retry cycles.
invocation_state.pop("event_loop_cycle_count", None)

before_invocation_event, _interrupts = await self.hooks.invoke_callbacks_async(
BeforeInvocationEvent(agent=self, invocation_state=invocation_state, messages=current_messages)
)
Expand Down Expand Up @@ -986,6 +1025,13 @@ async def _execute_event_loop_cycle(
if self._session_manager:
self._session_manager.sync_agent(self)

# Reset the cycle counter before the retry so context-overflow
# recovery doesn't silently consume the user's `max_iterations`
# budget. `max_iterations` caps tool-call cycles, not model-retry
# cycles — a successful `reduce_context` means the prior cycle
# never produced a usable turn, so it shouldn't count.
invocation_state.pop("event_loop_cycle_count", None)

events = self._execute_event_loop_cycle(invocation_state, structured_output_context)
async for event in events:
yield event
Expand Down
74 changes: 74 additions & 0 deletions src/strands/event_loop/event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
TypedEvent,
)
from ..types.content import Message, Messages
from ..types.event_loop import MAX_ITERATIONS_STOP_REASON, Metrics, Usage
from ..types.exceptions import (
ContextWindowOverflowException,
EventLoopException,
Expand Down Expand Up @@ -150,6 +151,14 @@ async def event_loop_cycle(
stop_reason = "tool_use"
message = agent.messages[-1]
else:
# Only count cycles that actually invoke the model. Cycles that
# skip model invocation (interrupt-resume, tool_use replay) are
# not real "iterations" for the purposes of the max_iterations
# cap and shouldn't consume the user's budget. First
# model-invoking cycle sets count=1. Exposed on invocation_state
# for HookProviders to observe progress.
invocation_state["event_loop_cycle_count"] = invocation_state.get("event_loop_cycle_count", 0) + 1

model_events = _handle_model_execution(
agent, cycle_span, cycle_trace, invocation_state, tracer, structured_output_context
)
Expand Down Expand Up @@ -590,6 +599,71 @@ async def _handle_tool_execution(
)
return

# Enforce max_iterations cap to prevent unbounded tool-call recursion.
# Without this guard, a degenerate model (or a model stuck on a sticky prompt /
# fuzzy fixture) can drive the loop forever: each cycle it re-emits the same
# tool_use, the tool result is appended, and we recurse again indefinitely.
#
# Access `_max_iterations` directly rather than via `getattr(..., None)`
# fallback: `Agent.__init__` always sets this attribute, so any missing-attr
# case is a bug we want to surface loudly (AttributeError) rather than
# silently disabling the cap.
max_iterations = agent._max_iterations
# Read with a 0 default so interrupt-resume / tool-use-replay paths (which
# reach tool execution without a preceding model invocation in this cycle)
# don't KeyError — those paths legitimately skip the counter bump.
if invocation_state.get("event_loop_cycle_count", 0) >= max_iterations:
logger.warning(
"cycle_count=<%d>, max_iterations=<%d> | max_iterations cap reached; "
"halting event loop and returning partial result",
invocation_state.get("event_loop_cycle_count", 0),
max_iterations,
)

# Inject a synthetic assistant message so consumers see a terminal turn
# in the conversation rather than a trailing tool_result with no response.
# Populate `metadata` with zeroed usage/metrics shape matching normal
# assistant messages so downstream consumers that read
# `message["metadata"]["usage"]` / `["metrics"]` don't KeyError on the
# synthetic halt message. Include a `"synthetic": True` marker so
# token-budgeting / analytics code that sums across history can filter
# out this terminal (it is NOT a real model call — zero usage reflects
# that, but the marker makes the distinction unambiguous and keeps the
# zeros out of per-call cost/latency percentiles).
synthetic_message: Message = {
"role": "assistant",
"content": [
{
"text": (
f"[Agent halted: reached max_iterations={max_iterations}. "
"The model kept requesting tool calls without terminating. "
"Returning with the information gathered so far.]"
)
}
],
"metadata": {
"usage": Usage(inputTokens=0, outputTokens=0, totalTokens=0),
"metrics": Metrics(latencyMs=0),
"synthetic": True,
},
}
agent.messages.append(synthetic_message)
await agent.hooks.invoke_callbacks_async(MessageAddedEvent(agent=agent, message=synthetic_message))

# Intentionally DO NOT yield ModelMessageEvent here — no model invocation
# occurred on the halt path. Consumers tracking 1:1 model-call correspondence
# via ModelMessageEvent would miscount if we emitted one. The MessageAddedEvent
# hook above and the terminal EventLoopStopEvent below together provide the
# full signal that a synthetic message was appended and the loop has stopped.
yield EventLoopStopEvent(
MAX_ITERATIONS_STOP_REASON,
synthetic_message,
agent.event_loop_metrics,
invocation_state["request_state"],
structured_output=structured_output_result,
)
return

events = recurse_event_loop(
agent=agent, invocation_state=invocation_state, structured_output_context=structured_output_context
)
Expand Down
12 changes: 11 additions & 1 deletion src/strands/types/event_loop.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
"""Event loop-related type definitions for the SDK."""

from typing import Literal
from typing import Final, Literal

from typing_extensions import Required, TypedDict

# Canonical stop-reason literal for the max_iterations cap. Defined as a
# module-level Final so producers (event_loop) and consumers (type literal,
# tests, downstream code) share one source of truth and a typo cannot drift
# a producer out of sync with the StopReason Literal below.
MAX_ITERATIONS_STOP_REASON: Final = "max_iterations"


class Usage(TypedDict, total=False):
"""Token usage information for model interactions.
Expand Down Expand Up @@ -42,6 +48,7 @@ class Metrics(TypedDict, total=False):
"end_turn",
"guardrail_intervened",
"interrupt",
"max_iterations",
"max_tokens",
"stop_sequence",
"tool_use",
Expand All @@ -53,6 +60,9 @@ class Metrics(TypedDict, total=False):
- "end_turn": Normal completion of the response
- "guardrail_intervened": Guardrail system intervened
- "interrupt": Agent was interrupted for human input
- "max_iterations": Agent reached its configured max_iterations cap
(see ``Agent(max_iterations=...)``); the event loop was halted to prevent
unbounded tool-call recursion.
- "max_tokens": Maximum token limit reached
- "stop_sequence": Stop sequence encountered
- "tool_use": Model requested to use a tool
Expand Down
1 change: 1 addition & 0 deletions tests/strands/agent/hooks/test_agent_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def mock_sleep():

any_props = {
"agent": ANY,
"event_loop_cycle_count": ANY,
"event_loop_cycle_id": ANY,
"event_loop_cycle_span": ANY,
"event_loop_cycle_trace": ANY,
Expand Down
4 changes: 4 additions & 0 deletions tests/strands/agent/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
agent=agent,
current_tool_use={"toolUseId": "123", "name": "test", "input": {}},
delta={"toolUse": {"input": '{"value"}'}},
event_loop_cycle_count=unittest.mock.ANY,
event_loop_cycle_id=unittest.mock.ANY,
event_loop_cycle_span=unittest.mock.ANY,
event_loop_cycle_trace=unittest.mock.ANY,
Expand All @@ -742,6 +743,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
unittest.mock.call(
agent=agent,
delta={"reasoningContent": {"text": "value"}},
event_loop_cycle_count=unittest.mock.ANY,
event_loop_cycle_id=unittest.mock.ANY,
event_loop_cycle_span=unittest.mock.ANY,
event_loop_cycle_trace=unittest.mock.ANY,
Expand All @@ -753,6 +755,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
unittest.mock.call(
agent=agent,
delta={"reasoningContent": {"signature": "value"}},
event_loop_cycle_count=unittest.mock.ANY,
event_loop_cycle_id=unittest.mock.ANY,
event_loop_cycle_span=unittest.mock.ANY,
event_loop_cycle_trace=unittest.mock.ANY,
Expand All @@ -767,6 +770,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
agent=agent,
data="value",
delta={"text": "value"},
event_loop_cycle_count=unittest.mock.ANY,
event_loop_cycle_id=unittest.mock.ANY,
event_loop_cycle_span=unittest.mock.ANY,
event_loop_cycle_trace=unittest.mock.ANY,
Expand Down
Loading