diff --git a/src/strands/agent/agent.py b/src/strands/agent/agent.py index e8ea3c9bc..ec6203e37 100644 --- a/src/strands/agent/agent.py +++ b/src/strands/agent/agent.py @@ -146,6 +146,7 @@ def __init__( tool_executor: ToolExecutor | None = None, retry_strategy: ModelRetryStrategy | _DefaultRetryStrategySentinel | None = _DEFAULT_RETRY_STRATEGY, concurrent_invocation_mode: ConcurrentInvocationMode = ConcurrentInvocationMode.THROW, + max_iterations: int = 25, ): """Initialize the Agent with the specified configuration. @@ -214,6 +215,17 @@ def __init__( Set to "unsafe_reentrant" to skip lock acquisition entirely, allowing concurrent invocations. Warning: "unsafe_reentrant" makes no guarantees about resulting behavior and is provided only for advanced use cases where the caller understands the risks. + max_iterations: Maximum number of event-loop cycles (model + tool round trips) per + invocation. When the agent would exceed this many cycles, the loop halts, + appends a synthetic assistant message noting the cap was reached, and returns + an AgentResult with stop_reason='max_iterations'. No further model invocation + is made after the cap trips. The synthetic halt message carries + ``metadata = {"usage": Usage(0,0,0), "metrics": Metrics(latencyMs=0), + "synthetic": True}`` — the ``"synthetic": True`` marker lets downstream + token-budgeting / analytics code filter out this terminal (it is NOT a real + model call; zeros reflect that but the marker makes the distinction + unambiguous). Defaults to 25. Precedent: LangChain AgentExecutor defaults + to 15; OpenAI Agents SDK defaults to 10. Raises: ValueError: If agent id contains path separators. @@ -313,6 +325,13 @@ def __init__( self._invocation_lock = threading.Lock() self._concurrent_invocation_mode = concurrent_invocation_mode + # Reject bool explicitly — `isinstance(True, int)` is True in Python, + # so `Agent(max_iterations=True)` would otherwise silently set the cap + # to 1. Catch this before the int check. + if isinstance(max_iterations, bool) or not isinstance(max_iterations, int) or max_iterations < 1: + raise ValueError(f"max_iterations must be a positive integer, got {max_iterations!r}") + self._max_iterations = max_iterations + # In the future, we'll have a RetryStrategy base class but until # that API is determined we only allow ModelRetryStrategy if ( @@ -889,6 +908,26 @@ async def _run_loop( current_messages: Messages | None = messages while current_messages is not None: + # Reset the per-invocation cycle counter at the top of each loop + # iteration. Two cases this handles: + # (1) Fresh top-level invocation with a caller-supplied + # `invocation_state` dict that still carries a counter from + # a previous invoke_async call — that counter must not leak + # across invocations. + # (2) Hook-driven resume: `AfterInvocationEvent` handlers may set + # `resume=` to drive another leg of the loop. Each + # resume leg is logically a fresh invocation from the cap's + # perspective and must get a fresh `max_iterations` budget — + # otherwise the first leg's cycles consume the budget for + # every subsequent leg. + # We `pop` (rather than set to 0) so the key is absent in the + # `InitEventLoopEvent` payload — downstream consumers that emit + # initial-state telemetry shouldn't see a pre-loop "cycle 0". + # The ContextWindowOverflowException retry path in + # `_execute_event_loop_cycle` also resets the counter — `max_iterations` + # caps tool-call cycles, not model-retry cycles. + invocation_state.pop("event_loop_cycle_count", None) + before_invocation_event, _interrupts = await self.hooks.invoke_callbacks_async( BeforeInvocationEvent(agent=self, invocation_state=invocation_state, messages=current_messages) ) @@ -986,6 +1025,13 @@ async def _execute_event_loop_cycle( if self._session_manager: self._session_manager.sync_agent(self) + # Reset the cycle counter before the retry so context-overflow + # recovery doesn't silently consume the user's `max_iterations` + # budget. `max_iterations` caps tool-call cycles, not model-retry + # cycles — a successful `reduce_context` means the prior cycle + # never produced a usable turn, so it shouldn't count. + invocation_state.pop("event_loop_cycle_count", None) + events = self._execute_event_loop_cycle(invocation_state, structured_output_context) async for event in events: yield event diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index bf1cc7a84..56c37d0d0 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -33,6 +33,7 @@ TypedEvent, ) from ..types.content import Message, Messages +from ..types.event_loop import MAX_ITERATIONS_STOP_REASON, Metrics, Usage from ..types.exceptions import ( ContextWindowOverflowException, EventLoopException, @@ -150,6 +151,14 @@ async def event_loop_cycle( stop_reason = "tool_use" message = agent.messages[-1] else: + # Only count cycles that actually invoke the model. Cycles that + # skip model invocation (interrupt-resume, tool_use replay) are + # not real "iterations" for the purposes of the max_iterations + # cap and shouldn't consume the user's budget. First + # model-invoking cycle sets count=1. Exposed on invocation_state + # for HookProviders to observe progress. + invocation_state["event_loop_cycle_count"] = invocation_state.get("event_loop_cycle_count", 0) + 1 + model_events = _handle_model_execution( agent, cycle_span, cycle_trace, invocation_state, tracer, structured_output_context ) @@ -590,6 +599,71 @@ async def _handle_tool_execution( ) return + # Enforce max_iterations cap to prevent unbounded tool-call recursion. + # Without this guard, a degenerate model (or a model stuck on a sticky prompt / + # fuzzy fixture) can drive the loop forever: each cycle it re-emits the same + # tool_use, the tool result is appended, and we recurse again indefinitely. + # + # Access `_max_iterations` directly rather than via `getattr(..., None)` + # fallback: `Agent.__init__` always sets this attribute, so any missing-attr + # case is a bug we want to surface loudly (AttributeError) rather than + # silently disabling the cap. + max_iterations = agent._max_iterations + # Read with a 0 default so interrupt-resume / tool-use-replay paths (which + # reach tool execution without a preceding model invocation in this cycle) + # don't KeyError — those paths legitimately skip the counter bump. + if invocation_state.get("event_loop_cycle_count", 0) >= max_iterations: + logger.warning( + "cycle_count=<%d>, max_iterations=<%d> | max_iterations cap reached; " + "halting event loop and returning partial result", + invocation_state.get("event_loop_cycle_count", 0), + max_iterations, + ) + + # Inject a synthetic assistant message so consumers see a terminal turn + # in the conversation rather than a trailing tool_result with no response. + # Populate `metadata` with zeroed usage/metrics shape matching normal + # assistant messages so downstream consumers that read + # `message["metadata"]["usage"]` / `["metrics"]` don't KeyError on the + # synthetic halt message. Include a `"synthetic": True` marker so + # token-budgeting / analytics code that sums across history can filter + # out this terminal (it is NOT a real model call — zero usage reflects + # that, but the marker makes the distinction unambiguous and keeps the + # zeros out of per-call cost/latency percentiles). + synthetic_message: Message = { + "role": "assistant", + "content": [ + { + "text": ( + f"[Agent halted: reached max_iterations={max_iterations}. " + "The model kept requesting tool calls without terminating. " + "Returning with the information gathered so far.]" + ) + } + ], + "metadata": { + "usage": Usage(inputTokens=0, outputTokens=0, totalTokens=0), + "metrics": Metrics(latencyMs=0), + "synthetic": True, + }, + } + agent.messages.append(synthetic_message) + await agent.hooks.invoke_callbacks_async(MessageAddedEvent(agent=agent, message=synthetic_message)) + + # Intentionally DO NOT yield ModelMessageEvent here — no model invocation + # occurred on the halt path. Consumers tracking 1:1 model-call correspondence + # via ModelMessageEvent would miscount if we emitted one. The MessageAddedEvent + # hook above and the terminal EventLoopStopEvent below together provide the + # full signal that a synthetic message was appended and the loop has stopped. + yield EventLoopStopEvent( + MAX_ITERATIONS_STOP_REASON, + synthetic_message, + agent.event_loop_metrics, + invocation_state["request_state"], + structured_output=structured_output_result, + ) + return + events = recurse_event_loop( agent=agent, invocation_state=invocation_state, structured_output_context=structured_output_context ) diff --git a/src/strands/types/event_loop.py b/src/strands/types/event_loop.py index fca141327..a3d074af0 100644 --- a/src/strands/types/event_loop.py +++ b/src/strands/types/event_loop.py @@ -1,9 +1,15 @@ """Event loop-related type definitions for the SDK.""" -from typing import Literal +from typing import Final, Literal from typing_extensions import Required, TypedDict +# Canonical stop-reason literal for the max_iterations cap. Defined as a +# module-level Final so producers (event_loop) and consumers (type literal, +# tests, downstream code) share one source of truth and a typo cannot drift +# a producer out of sync with the StopReason Literal below. +MAX_ITERATIONS_STOP_REASON: Final = "max_iterations" + class Usage(TypedDict, total=False): """Token usage information for model interactions. @@ -42,6 +48,7 @@ class Metrics(TypedDict, total=False): "end_turn", "guardrail_intervened", "interrupt", + "max_iterations", "max_tokens", "stop_sequence", "tool_use", @@ -53,6 +60,9 @@ class Metrics(TypedDict, total=False): - "end_turn": Normal completion of the response - "guardrail_intervened": Guardrail system intervened - "interrupt": Agent was interrupted for human input +- "max_iterations": Agent reached its configured max_iterations cap + (see ``Agent(max_iterations=...)``); the event loop was halted to prevent + unbounded tool-call recursion. - "max_tokens": Maximum token limit reached - "stop_sequence": Stop sequence encountered - "tool_use": Model requested to use a tool diff --git a/tests/strands/agent/hooks/test_agent_events.py b/tests/strands/agent/hooks/test_agent_events.py index 1f09579b0..7d48070f7 100644 --- a/tests/strands/agent/hooks/test_agent_events.py +++ b/tests/strands/agent/hooks/test_agent_events.py @@ -40,6 +40,7 @@ def mock_sleep(): any_props = { "agent": ANY, + "event_loop_cycle_count": ANY, "event_loop_cycle_id": ANY, "event_loop_cycle_span": ANY, "event_loop_cycle_trace": ANY, diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py index 1e27274a1..53a201f70 100644 --- a/tests/strands/agent/test_agent.py +++ b/tests/strands/agent/test_agent.py @@ -731,6 +731,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator): agent=agent, current_tool_use={"toolUseId": "123", "name": "test", "input": {}}, delta={"toolUse": {"input": '{"value"}'}}, + event_loop_cycle_count=unittest.mock.ANY, event_loop_cycle_id=unittest.mock.ANY, event_loop_cycle_span=unittest.mock.ANY, event_loop_cycle_trace=unittest.mock.ANY, @@ -742,6 +743,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator): unittest.mock.call( agent=agent, delta={"reasoningContent": {"text": "value"}}, + event_loop_cycle_count=unittest.mock.ANY, event_loop_cycle_id=unittest.mock.ANY, event_loop_cycle_span=unittest.mock.ANY, event_loop_cycle_trace=unittest.mock.ANY, @@ -753,6 +755,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator): unittest.mock.call( agent=agent, delta={"reasoningContent": {"signature": "value"}}, + event_loop_cycle_count=unittest.mock.ANY, event_loop_cycle_id=unittest.mock.ANY, event_loop_cycle_span=unittest.mock.ANY, event_loop_cycle_trace=unittest.mock.ANY, @@ -767,6 +770,7 @@ def test_agent__call__callback(mock_model, agent, callback_handler, agenerator): agent=agent, data="value", delta={"text": "value"}, + event_loop_cycle_count=unittest.mock.ANY, event_loop_cycle_id=unittest.mock.ANY, event_loop_cycle_span=unittest.mock.ANY, event_loop_cycle_trace=unittest.mock.ANY, diff --git a/tests/strands/agent/test_agent_max_iterations.py b/tests/strands/agent/test_agent_max_iterations.py new file mode 100644 index 000000000..391c76291 --- /dev/null +++ b/tests/strands/agent/test_agent_max_iterations.py @@ -0,0 +1,353 @@ +"""Tests for max_iterations cap on the Agent event loop. + +Without a cap, a degenerate model that always emits the same tool_use can drive +the event loop into unbounded recursion: each cycle the model re-emits the same +tool call, the tool result is appended, and the loop continues forever. This +test suite verifies that Agent(max_iterations=N) causes the loop to terminate +after at most N tool-call cycles with a synthetic terminal message and a +warning log. +""" + +import asyncio +import json +import logging +from collections.abc import AsyncGenerator +from typing import Any + +import pytest + +import strands +from strands import Agent +from strands.models import Model +from strands.types.content import Messages, SystemContentBlock +from strands.types.streaming import StreamEvent +from strands.types.tools import ToolChoice, ToolSpec + + +class LoopingStubModel(Model): + """Model that always emits the same tool_use — never terminates on its own.""" + + def __init__(self) -> None: + self.call_count = 0 + + def format_chunk(self, event: Any) -> StreamEvent: + return event + + def format_request(self, messages, tool_specs=None, system_prompt=None): + return None + + def get_config(self): + return {} + + def update_config(self, **cfg): + pass + + async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): + raise NotImplementedError("LoopingStubModel does not support structured output") + yield # pragma: no cover — tells the type checker this is a generator + + async def stream( + self, + messages: Messages, + tool_specs: list[ToolSpec] | None = None, + system_prompt: str | None = None, + *, + tool_choice: ToolChoice | None = None, + system_prompt_content: list[SystemContentBlock] | None = None, + invocation_state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> AsyncGenerator[dict, None]: + self.call_count += 1 + yield {"messageStart": {"role": "assistant"}} + yield { + "contentBlockStart": { + "start": { + "toolUse": { + "name": "get_weather", + "toolUseId": f"tu_{self.call_count}", + } + } + } + } + yield {"contentBlockDelta": {"delta": {"toolUse": {"input": json.dumps({"city": "SF"})}}}} + yield {"contentBlockStop": {}} + yield {"messageStop": {"stopReason": "tool_use"}} + + +@strands.tool +def get_weather(city: str) -> str: + """Get weather for a city.""" + return f"sunny in {city}" + + +@pytest.mark.asyncio +async def test_agent_max_iterations_terminates_loop(caplog): + """Agent configured with max_iterations=5 must halt a looping model at exactly 5 cycles. + + Asserts: + (a) The loop terminates (does not hang). + (b) The model was invoked exactly `max_iterations` times (pins off-by-one regressions). + (c) A warning is emitted with the exact expected substring when the cap trips. + (d) `result.stop_reason == "max_iterations"`. + (e) The last message is a synthetic assistant message whose content mentions + "max_iterations" and which has a `metadata` key (so downstream consumers + that read metadata.usage / metadata.metrics don't KeyError). + """ + caplog.set_level(logging.WARNING, logger="strands.event_loop.event_loop") + + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=5) + + # Must terminate within a bounded wall-clock budget. + result = await asyncio.wait_for(agent.invoke_async("weather in SF?"), timeout=30.0) + + # (b) cap enforced exactly — pins off-by-one regressions + assert model.call_count == 5, ( + f"model invoked {model.call_count} times, expected exactly 5" + ) + + # (a) loop terminated with a result + assert result is not None + + # (d) stop_reason reflects the cap + assert result.stop_reason == "max_iterations" + + # (e) synthetic terminal assistant message with metadata populated + last_message = agent.messages[-1] + assert last_message["role"] == "assistant" + flat_text = "".join( + block.get("text", "") for block in last_message.get("content", []) + ) + assert "max_iterations" in flat_text, ( + f"expected synthetic message to mention 'max_iterations', got: {flat_text!r}" + ) + assert "metadata" in last_message, ( + "synthetic message missing 'metadata' key — consumers that read " + "metadata.usage / metadata.metrics will KeyError" + ) + # metadata must have the usage + metrics shape matching normal assistant messages + assert "usage" in last_message["metadata"] + assert "metrics" in last_message["metadata"] + # metadata must carry the `synthetic: True` marker so downstream + # token-budgeting / analytics can filter the halt message out of + # per-call cost percentiles (it is NOT a real model call). + assert last_message["metadata"].get("synthetic") is True, ( + "synthetic halt message missing metadata['synthetic']=True marker" + ) + + # (c) warning emitted with the exact substring we document + warning_texts = [ + r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING + ] + assert any( + "max_iterations cap reached" in msg for msg in warning_texts + ), f"expected 'max_iterations cap reached' in warnings, got: {warning_texts}" + + +@pytest.mark.asyncio +async def test_agent_max_iterations_one_boundary(): + """max_iterations=1 must trip after exactly 1 cycle — no crash, synthetic message present.""" + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=1) + + result = await asyncio.wait_for(agent.invoke_async("weather?"), timeout=10.0) + + assert model.call_count == 1 + assert result.stop_reason == "max_iterations" + assert agent.messages[-1]["role"] == "assistant" + assert "metadata" in agent.messages[-1] + assert agent.messages[-1]["metadata"].get("synthetic") is True + + +@pytest.mark.parametrize( + "bad_value", + [ + 0, + -1, + "5", + 2.5, + True, # bool is a subclass of int; must be rejected explicitly + False, + ], +) +def test_agent_max_iterations_validation_rejects(bad_value): + """Agent constructor must reject non-positive-int max_iterations values.""" + with pytest.raises(ValueError): + Agent(max_iterations=bad_value) + + +@pytest.mark.asyncio +async def test_agent_max_iterations_counter_resets_between_invocations(): + """Reusing the SAME invocation_state dict across calls must NOT leak the counter. + + Regression guard for: cycle counter stored on caller-supplied dict accumulating + across independent agent invocations. + """ + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=3) + + shared_state: dict[str, Any] = {} + + # First call: cap should trip at 3. + await asyncio.wait_for( + agent.invoke_async("first", invocation_state=shared_state), timeout=10.0 + ) + first_count = model.call_count + assert first_count == 3, f"first call: expected 3, got {first_count}" + + # Second call: if the counter leaked, we'd trip immediately (0 more calls). + # If the counter resets correctly, we get another 3 cycles. + await asyncio.wait_for( + agent.invoke_async("second", invocation_state=shared_state), timeout=10.0 + ) + delta = model.call_count - first_count + assert delta == 3, ( + f"second call: expected another 3 cycles (counter reset), got {delta}. " + "Counter leaked across invocations via shared invocation_state dict." + ) + + +@pytest.mark.asyncio +async def test_agent_max_iterations_resets_on_context_overflow(): + """max_iterations should cap tool-call cycles, NOT model-retry cycles. + + When ContextWindowOverflowException fires and the conversation_manager + successfully reduces context, the recursive retry should reset the + cycle counter so the user's budget is restored (cap = max tool-call cycles, + not total model invocations). + """ + from strands.types.exceptions import ContextWindowOverflowException + + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=4) + + real_event_loop_cycle = strands.event_loop.event_loop.event_loop_cycle + overflow_fired = {"done": False} + + async def cycle_that_overflows_once(*args, **kwargs): + """First invocation raises overflow AFTER bumping the counter, then + subsequent invocations go through to the real cycle. + """ + if not overflow_fired["done"]: + overflow_fired["done"] = True + inv_state = kwargs.get("invocation_state") or (args[1] if len(args) > 1 else {}) + # Simulate a cycle that got partway through: bump the counter (as + # the real cycle does when it invokes the model) before raising. + inv_state["event_loop_cycle_count"] = ( + inv_state.get("event_loop_cycle_count", 0) + 1 + ) + raise ContextWindowOverflowException("simulated overflow") + yield # unreachable; marks this as an async generator + async for ev in real_event_loop_cycle(*args, **kwargs): + yield ev + + # Stub reduce_context so we don't fight the sliding-window trimming logic + # (which requires enough messages to trim from). The behavior under test + # is the counter reset, not trim logic. + agent.conversation_manager.reduce_context = lambda *a, **kw: None + + # Patch the symbol as imported into agent.py. + import strands.agent.agent as agent_mod + + original = agent_mod.event_loop_cycle + agent_mod.event_loop_cycle = cycle_that_overflows_once + try: + await asyncio.wait_for(agent.invoke_async("weather?"), timeout=10.0) + finally: + agent_mod.event_loop_cycle = original + + # If the counter were NOT reset on the overflow retry, the pre-overflow cycle + # would consume 1 unit of budget and we'd only get 3 real cycles before the + # cap trips. With the reset we get the full 4-cycle budget post-recovery. + assert model.call_count == 4, ( + f"expected 4 post-overflow cycles (counter reset), got {model.call_count}" + ) + + +@pytest.mark.asyncio +async def test_agent_max_iterations_resets_on_hook_resume(): + """Each hook-driven `resume` leg must get a fresh `max_iterations` budget. + + `AfterInvocationEvent` handlers can set `event.resume = ` to drive + another leg of the agent loop (see `test_agent_hooks.test_after_invocation_ + resume_triggers_new_invocation`). Each resume leg is logically a fresh + invocation from the cap's perspective — otherwise the first leg's cycles + would bleed into every subsequent leg's budget and trip the cap prematurely. + + Regression guard: without the per-iteration `pop` inside `_run_loop`'s + `while current_messages is not None:` loop, leg 1 consumes N cycles and + leg 2 starts at cycle N+1, leaving only `max_iterations - N` cycles. + """ + from strands.hooks import AfterInvocationEvent + + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=3) + + resume_count = 0 + + async def resume_once(event: AfterInvocationEvent) -> None: + nonlocal resume_count + if resume_count == 0: + resume_count += 1 + event.resume = "keep going" + + agent.hooks.add_callback(AfterInvocationEvent, resume_once) + + await asyncio.wait_for(agent.invoke_async("weather?"), timeout=15.0) + + # Leg 1 burns 3 cycles (cap trips), then resume fires. If the counter + # didn't reset on the resume leg, leg 2 would start at count=3 (already + # at/over the cap) and trip immediately without any model calls — giving + # a total of 3 model calls. With the reset, leg 2 gets a fresh 3-cycle + # budget and the total is 6. + assert resume_count == 1, "resume hook must fire exactly once" + assert model.call_count == 6, ( + f"expected 6 total model calls (3 per leg × 2 legs with reset), " + f"got {model.call_count}. Counter leaked across resume legs." + ) + + +@pytest.mark.asyncio +async def test_agent_max_iterations_halt_does_not_emit_model_message_event(): + """The halt path must NOT emit `ModelMessageEvent` — no model invocation occurred. + + `ModelMessageEvent` is the SDK's signal "the model just produced this message." + Consumers tracking 1:1 correspondence with model calls (for metrics, retry + accounting, replay, etc.) would miscount if the synthetic halt message were + announced as a model-produced message. The `MessageAddedEvent` hook + terminal + `EventLoopStopEvent` are the correct signals on this path. + """ + from strands.types import _events as events_mod + + # Count ModelMessageEvent constructions across the invocation. Patching the + # class __init__ is the most direct way to observe event-type emission + # without depending on the internal wiring between `_run_loop`, + # `stream_async`, telemetry, and `as_dict()` shapes. + model_message_event_count = 0 + original_init = events_mod.ModelMessageEvent.__init__ + + def counting_init(self_, *args, **kwargs): + nonlocal model_message_event_count + model_message_event_count += 1 + original_init(self_, *args, **kwargs) + + events_mod.ModelMessageEvent.__init__ = counting_init + try: + model = LoopingStubModel() + agent = Agent(model=model, tools=[get_weather], max_iterations=2) + result = await asyncio.wait_for(agent.invoke_async("weather?"), timeout=10.0) + finally: + events_mod.ModelMessageEvent.__init__ = original_init + + # `LoopingStubModel` emits a tool_use on every cycle, producing one real + # `ModelMessageEvent` per real model call. With max_iterations=2 we get 2 + # real model turns, then the cap trips. The synthetic halt message must + # NOT contribute an additional `ModelMessageEvent` — no model invocation + # occurred on the halt path. + assert result.stop_reason == "max_iterations" + assert model.call_count == 2, f"expected 2 real model calls, got {model.call_count}" + assert model_message_event_count == 2, ( + f"expected 2 ModelMessageEvents (one per real model call), got " + f"{model_message_event_count}. The synthetic halt message must NOT emit " + f"a ModelMessageEvent — no model invocation occurred on the halt path." + ) diff --git a/tests/strands/event_loop/test_event_loop.py b/tests/strands/event_loop/test_event_loop.py index 871371f5f..4f7d97259 100644 --- a/tests/strands/event_loop/test_event_loop.py +++ b/tests/strands/event_loop/test_event_loop.py @@ -159,6 +159,9 @@ def agent(model, system_prompt, messages, tool_registry, thread_pool, hook_regis mock._model_state = {} mock.trace_attributes = {} mock.retry_strategy = ModelRetryStrategy() + # Default cap high enough that the existing event_loop tests never trip it; + # the cap behavior itself is covered in tests/strands/agent/test_agent_max_iterations.py. + mock._max_iterations = 1000 return mock