strands-agents · jpr5 · Apr 20, 2026
diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py
@@ -146,6 +146,7 @@ def __init__(
         tool_executor: ToolExecutor | None = None,
         retry_strategy: ModelRetryStrategy | _DefaultRetryStrategySentinel | None = _DEFAULT_RETRY_STRATEGY,
         concurrent_invocation_mode: ConcurrentInvocationMode = ConcurrentInvocationMode.THROW,
+        max_iterations: int = 25,
     ):
         """Initialize the Agent with the specified configuration.
 
@@ -214,6 +215,17 @@ def __init__(
                 Set to "unsafe_reentrant" to skip lock acquisition entirely, allowing concurrent invocations.
                 Warning: "unsafe_reentrant" makes no guarantees about resulting behavior and is provided
                 only for advanced use cases where the caller understands the risks.
+            max_iterations: Maximum number of event-loop cycles (model + tool round trips) per
+                invocation. When the agent would exceed this many cycles, the loop halts,
+                appends a synthetic assistant message noting the cap was reached, and returns
+                an AgentResult with stop_reason='max_iterations'. No further model invocation
+                is made after the cap trips. The synthetic halt message carries
+                ``metadata = {"usage": Usage(0,0,0), "metrics": Metrics(latencyMs=0),
+                "synthetic": True}`` — the ``"synthetic": True`` marker lets downstream
+                token-budgeting / analytics code filter out this terminal (it is NOT a real
+                model call; zeros reflect that but the marker makes the distinction
+                unambiguous). Defaults to 25. Precedent: LangChain AgentExecutor defaults
+                to 15; OpenAI Agents SDK defaults to 10.
 
         Raises:
             ValueError: If agent id contains path separators.
@@ -313,6 +325,13 @@ def __init__(
         self._invocation_lock = threading.Lock()
         self._concurrent_invocation_mode = concurrent_invocation_mode
 
+        # Reject bool explicitly — `isinstance(True, int)` is True in Python,
+        # so `Agent(max_iterations=True)` would otherwise silently set the cap
+        # to 1. Catch this before the int check.
+        if isinstance(max_iterations, bool) or not isinstance(max_iterations, int) or max_iterations < 1:
+            raise ValueError(f"max_iterations must be a positive integer, got {max_iterations!r}")
+        self._max_iterations = max_iterations
+
         # In the future, we'll have a RetryStrategy base class but until
         # that API is determined we only allow ModelRetryStrategy
         if (
@@ -889,6 +908,26 @@ async def _run_loop(
         current_messages: Messages | None = messages
 
         while current_messages is not None:
+            # Reset the per-invocation cycle counter at the top of each loop
+            # iteration. Two cases this handles:
+            #   (1) Fresh top-level invocation with a caller-supplied
+            #       `invocation_state` dict that still carries a counter from
+            #       a previous invoke_async call — that counter must not leak
+            #       across invocations.
+            #   (2) Hook-driven resume: `AfterInvocationEvent` handlers may set
+            #       `resume=<messages>` to drive another leg of the loop. Each
+            #       resume leg is logically a fresh invocation from the cap's
+            #       perspective and must get a fresh `max_iterations` budget —
+            #       otherwise the first leg's cycles consume the budget for
+            #       every subsequent leg.
+            # We `pop` (rather than set to 0) so the key is absent in the
+            # `InitEventLoopEvent` payload — downstream consumers that emit
+            # initial-state telemetry shouldn't see a pre-loop "cycle 0".
+            # The ContextWindowOverflowException retry path in
+            # `_execute_event_loop_cycle` also resets the counter — `max_iterations`
+            # caps tool-call cycles, not model-retry cycles.
+            invocation_state.pop("event_loop_cycle_count", None)
+
             before_invocation_event, _interrupts = await self.hooks.invoke_callbacks_async(
                 BeforeInvocationEvent(agent=self, invocation_state=invocation_state, messages=current_messages)
             )
@@ -986,6 +1025,13 @@ async def _execute_event_loop_cycle(
             if self._session_manager:
                 self._session_manager.sync_agent(self)
 
+            # Reset the cycle counter before the retry so context-overflow
+            # recovery doesn't silently consume the user's `max_iterations`
+            # budget. `max_iterations` caps tool-call cycles, not model-retry
+            # cycles — a successful `reduce_context` means the prior cycle
+            # never produced a usable turn, so it shouldn't count.
+            invocation_state.pop("event_loop_cycle_count", None)
+
             events = self._execute_event_loop_cycle(invocation_state, structured_output_context)
             async for event in events:
                 yield event

diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py
@@ -33,6 +33,7 @@
     TypedEvent,
 )
 from ..types.content import Message, Messages
+from ..types.event_loop import MAX_ITERATIONS_STOP_REASON, Metrics, Usage
 from ..types.exceptions import (
     ContextWindowOverflowException,
     EventLoopException,
@@ -150,6 +151,14 @@ async def event_loop_cycle(
                 stop_reason = "tool_use"
                 message = agent.messages[-1]
             else:
+                # Only count cycles that actually invoke the model. Cycles that
+                # skip model invocation (interrupt-resume, tool_use replay) are
+                # not real "iterations" for the purposes of the max_iterations
+                # cap and shouldn't consume the user's budget. First
+                # model-invoking cycle sets count=1. Exposed on invocation_state
+                # for HookProviders to observe progress.
+                invocation_state["event_loop_cycle_count"] = invocation_state.get("event_loop_cycle_count", 0) + 1
+
                 model_events = _handle_model_execution(
                     agent, cycle_span, cycle_trace, invocation_state, tracer, structured_output_context
                 )
@@ -590,6 +599,71 @@ async def _handle_tool_execution(
         )
         return
 
+    # Enforce max_iterations cap to prevent unbounded tool-call recursion.
+    # Without this guard, a degenerate model (or a model stuck on a sticky prompt /
+    # fuzzy fixture) can drive the loop forever: each cycle it re-emits the same
+    # tool_use, the tool result is appended, and we recurse again indefinitely.
+    #
+    # Access `_max_iterations` directly rather than via `getattr(..., None)`
+    # fallback: `Agent.__init__` always sets this attribute, so any missing-attr
+    # case is a bug we want to surface loudly (AttributeError) rather than
+    # silently disabling the cap.
+    max_iterations = agent._max_iterations
+    # Read with a 0 default so interrupt-resume / tool-use-replay paths (which
+    # reach tool execution without a preceding model invocation in this cycle)
+    # don't KeyError — those paths legitimately skip the counter bump.
+    if invocation_state.get("event_loop_cycle_count", 0) >= max_iterations:
+        logger.warning(
+            "cycle_count=<%d>, max_iterations=<%d> | max_iterations cap reached; "
+            "halting event loop and returning partial result",
+            invocation_state.get("event_loop_cycle_count", 0),
+            max_iterations,
+        )
+
+        # Inject a synthetic assistant message so consumers see a terminal turn
+        # in the conversation rather than a trailing tool_result with no response.
+        # Populate `metadata` with zeroed usage/metrics shape matching normal
+        # assistant messages so downstream consumers that read
+        # `message["metadata"]["usage"]` / `["metrics"]` don't KeyError on the
+        # synthetic halt message. Include a `"synthetic": True` marker so
+        # token-budgeting / analytics code that sums across history can filter
+        # out this terminal (it is NOT a real model call — zero usage reflects
+        # that, but the marker makes the distinction unambiguous and keeps the
+        # zeros out of per-call cost/latency percentiles).
+        synthetic_message: Message = {
+            "role": "assistant",
+            "content": [
+                {
+                    "text": (
+                        f"[Agent halted: reached max_iterations={max_iterations}. "
+                        "The model kept requesting tool calls without terminating. "
+                        "Returning with the information gathered so far.]"
+                    )
+                }
+            ],
+            "metadata": {
+                "usage": Usage(inputTokens=0, outputTokens=0, totalTokens=0),
+                "metrics": Metrics(latencyMs=0),
+                "synthetic": True,
+            },
+        }
+        agent.messages.append(synthetic_message)
+        await agent.hooks.invoke_callbacks_async(MessageAddedEvent(agent=agent, message=synthetic_message))
+
+        # Intentionally DO NOT yield ModelMessageEvent here — no model invocation
+        # occurred on the halt path. Consumers tracking 1:1 model-call correspondence
+        # via ModelMessageEvent would miscount if we emitted one. The MessageAddedEvent
+        # hook above and the terminal EventLoopStopEvent below together provide the
+        # full signal that a synthetic message was appended and the loop has stopped.
+        yield EventLoopStopEvent(
+            MAX_ITERATIONS_STOP_REASON,
+            synthetic_message,
+            agent.event_loop_metrics,
+            invocation_state["request_state"],
+            structured_output=structured_output_result,
+        )
+        return
+
     events = recurse_event_loop(
         agent=agent, invocation_state=invocation_state, structured_output_context=structured_output_context
     )

diff --git a/src/strands/types/event_loop.py b/src/strands/types/event_loop.py
@@ -1,9 +1,15 @@
 """Event loop-related type definitions for the SDK."""
 
-from typing import Literal
+from typing import Final, Literal
 
 from typing_extensions import Required, TypedDict
 
+# Canonical stop-reason literal for the max_iterations cap. Defined as a
+# module-level Final so producers (event_loop) and consumers (type literal,
+# tests, downstream code) share one source of truth and a typo cannot drift
+# a producer out of sync with the StopReason Literal below.
+MAX_ITERATIONS_STOP_REASON: Final = "max_iterations"
+
 
 class Usage(TypedDict, total=False):
     """Token usage information for model interactions.
@@ -42,6 +48,7 @@ class Metrics(TypedDict, total=False):
     "end_turn",
     "guardrail_intervened",
     "interrupt",
+    "max_iterations",
     "max_tokens",
     "stop_sequence",
     "tool_use",
@@ -53,6 +60,9 @@ class Metrics(TypedDict, total=False):
 - "end_turn": Normal completion of the response
 - "guardrail_intervened": Guardrail system intervened
 - "interrupt": Agent was interrupted for human input
+- "max_iterations": Agent reached its configured max_iterations cap
+  (see ``Agent(max_iterations=...)``); the event loop was halted to prevent
+  unbounded tool-call recursion.
 - "max_tokens": Maximum token limit reached
 - "stop_sequence": Stop sequence encountered
 - "tool_use": Model requested to use a tool

diff --git a/tests/strands/agent/hooks/test_agent_events.py b/tests/strands/agent/hooks/test_agent_events.py
@@ -40,6 +40,7 @@ def mock_sleep():
 
 any_props = {
     "agent": ANY,
+    "event_loop_cycle_count": ANY,
     "event_loop_cycle_id": ANY,
     "event_loop_cycle_span": ANY,
     "event_loop_cycle_trace": ANY,

diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py
@@ -731,6 +731,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
             agent=agent,
             current_tool_use={"toolUseId": "123", "name": "test", "input": {}},
             delta={"toolUse": {"input": '{"value"}'}},
+            event_loop_cycle_count=unittest.mock.ANY,
             event_loop_cycle_id=unittest.mock.ANY,
             event_loop_cycle_span=unittest.mock.ANY,
             event_loop_cycle_trace=unittest.mock.ANY,
@@ -742,6 +743,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
         unittest.mock.call(
             agent=agent,
             delta={"reasoningContent": {"text": "value"}},
+            event_loop_cycle_count=unittest.mock.ANY,
             event_loop_cycle_id=unittest.mock.ANY,
             event_loop_cycle_span=unittest.mock.ANY,
             event_loop_cycle_trace=unittest.mock.ANY,
@@ -753,6 +755,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
         unittest.mock.call(
             agent=agent,
             delta={"reasoningContent": {"signature": "value"}},
+            event_loop_cycle_count=unittest.mock.ANY,
             event_loop_cycle_id=unittest.mock.ANY,
             event_loop_cycle_span=unittest.mock.ANY,
             event_loop_cycle_trace=unittest.mock.ANY,
@@ -767,6 +770,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator):
             agent=agent,
             data="value",
             delta={"text": "value"},
+            event_loop_cycle_count=unittest.mock.ANY,
             event_loop_cycle_id=unittest.mock.ANY,
             event_loop_cycle_span=unittest.mock.ANY,
             event_loop_cycle_trace=unittest.mock.ANY,