Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/strands/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ def __init__(
tool_executor: ToolExecutor | None = None,
retry_strategy: ModelRetryStrategy | _DefaultRetryStrategySentinel | None = _DEFAULT_RETRY_STRATEGY,
concurrent_invocation_mode: ConcurrentInvocationMode = ConcurrentInvocationMode.THROW,
max_turns: int | None = None,
max_token_budget: int | None = None,
):
"""Initialize the Agent with the specified configuration.

Expand Down Expand Up @@ -214,6 +216,13 @@ def __init__(
Set to "unsafe_reentrant" to skip lock acquisition entirely, allowing concurrent invocations.
Warning: "unsafe_reentrant" makes no guarantees about resulting behavior and is provided
only for advanced use cases where the caller understands the risks.
max_turns: Maximum number of model calls per invocation. Sets stop_reason="max_turns"
on the result when the limit is reached. Only actual model calls count; forced
structured-output retries each consume an additional turn.
Must be a positive integer or None (no limit). Defaults to None.
max_token_budget: Maximum cumulative tokens (totalTokens) per invocation. Sets
stop_reason="max_token_budget" on the result when the limit is reached.
Must be a positive integer or None (no limit). Defaults to None.

Raises:
ValueError: If agent id contains path separators.
Expand Down Expand Up @@ -313,6 +322,16 @@ def __init__(
self._invocation_lock = threading.Lock()
self._concurrent_invocation_mode = concurrent_invocation_mode

if max_turns is not None and max_turns < 1:
raise ValueError("max_turns must be a positive integer")
if max_token_budget is not None and max_token_budget < 1:
raise ValueError("max_token_budget must be a positive integer")

self.max_turns = max_turns
self.max_token_budget = max_token_budget
self._invocation_turn_count: int = 0
self._invocation_token_count: int = 0

# In the future, we'll have a RetryStrategy base class but until
# that API is determined we only allow ModelRetryStrategy
if (
Expand Down Expand Up @@ -496,7 +515,7 @@ def __call__(
Returns:
Result object containing:

- stop_reason: Why the event loop stopped (e.g., "end_turn", "max_tokens")
- stop_reason: Why the event loop stopped (e.g., "end_turn", "max_tokens", "max_turns", "max_token_budget")
- message: The final message from the model
- metrics: Performance metrics from the event loop
- state: The final state of the event loop
Expand Down Expand Up @@ -543,7 +562,7 @@ async def invoke_async(
Returns:
Result: object containing:

- stop_reason: Why the event loop stopped (e.g., "end_turn", "max_tokens")
- stop_reason: Why the event loop stopped (e.g., "end_turn", "max_tokens", "max_turns", "max_token_budget")
- message: The final message from the model
- metrics: Performance metrics from the event loop
- state: The final state of the event loop
Expand Down Expand Up @@ -831,6 +850,8 @@ async def stream_async(
self._interrupt_state.resume(prompt)

self.event_loop_metrics.reset_usage_metrics()
self._invocation_turn_count = 0
self._invocation_token_count = 0

merged_state = {}
if kwargs:
Expand Down
25 changes: 25 additions & 0 deletions src/strands/event_loop/event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,25 @@ async def event_loop_cycle(

with trace_api.use_span(cycle_span, end_on_exit=False):
try:
if agent.max_turns is not None and agent._invocation_turn_count >= agent.max_turns:
last_message = agent.messages[-1]
agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes)
tracer.end_event_loop_cycle_span(cycle_span, last_message)
yield EventLoopStopEvent(
"max_turns", last_message, agent.event_loop_metrics, invocation_state["request_state"]
)
return
if agent.max_token_budget is not None and agent._invocation_token_count >= agent.max_token_budget:
last_message = agent.messages[-1]
agent.event_loop_metrics.end_cycle(cycle_start_time, cycle_trace, attributes)
tracer.end_event_loop_cycle_span(cycle_span, last_message)
yield EventLoopStopEvent(
"max_token_budget", last_message, agent.event_loop_metrics, invocation_state["request_state"]
)
return
# Skipping model invocation if in interrupt state as interrupts are currently only supported for tool calls.
# Neither this path nor the existing-tool-use path below increments _invocation_turn_count
# because no model call is made — max_turns limits model invocations only.
if agent._interrupt_state.activated:
stop_reason: StopReason = "tool_use"
message = agent._interrupt_state.context["tool_use_message"]
Expand All @@ -158,6 +176,13 @@ async def event_loop_cycle(
yield model_event

stop_reason, message, *_ = model_event["stop"]
agent._invocation_turn_count += 1
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One minor correctnss concern -> the early-exit guard fires before checking agent._interrupt_state.activated and _has_tool_use_in_latest_message. Those 2 paths execute the model at stop_reason = "tool_use" without incrementing _invocation_turn_count. This means a sequence like:

cycle 1: tool_use (no model call) → counter stays 0
cycle 2: model call → counter becomes 1

this could burn an extra cycle past max_turns=1 in interrupt/existing-tool-use scenarios. This is an edge case and may be acceptable, but it's worth a comment acknowledging the behavior or a test covering it.
invocation_state["request_state"] at early exit, by line 123–124 of event_loop.py, request_state is initialized before the try block where the limit checks now live, so invocation_state["request_state"] is always present when the EventLoopStopEvent is yielded. Safe.

The _retry_strategy attribute name inconsistency, the existing mock fixture in test_event_loop.py sets mock.retry_strategy (no underscore prefix) while agent.py stores it as self._retry_strategy. This is a pre-existing issue, not introduced by this PR.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment at the branch point making it explicit that neither the interrupt nor existing-tool-use path increments _invocation_turn_count (no model call = no turn consumed), and added test_max_turns_skipped_model_cycle_does_not_consume_turn to pin that behaviour.

Also caught two small things on the final pass: stale stop_reason examples in the stream_async/call docstrings weren't listing the two new stop reasons, and test_max_token_budget_stops_when_counter_already_at_limit wasn't using apply_execution_limit_defaults like the rest of the file. Both fixed.

I learned a lot from this one, Thanks for the review, really appreciated!

# metadata is attached to message inside _handle_model_execution before the
# stop event is the last item yielded, so it is populated by the time the
# async-for loop above finishes and we read model_event["stop"] here.
agent._invocation_token_count += (
message.get("metadata", {}).get("usage", {}).get("totalTokens", 0)
)
yield ModelMessageEvent(message=message)
except Exception as e:
tracer.end_span_with_error(cycle_span, str(e), e)
Expand Down
4 changes: 4 additions & 0 deletions src/strands/types/event_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ class Metrics(TypedDict, total=False):
"end_turn",
"guardrail_intervened",
"interrupt",
"max_token_budget",
"max_tokens",
"max_turns",
"stop_sequence",
"tool_use",
]
Expand All @@ -55,7 +57,9 @@ class Metrics(TypedDict, total=False):
- "end_turn": Normal completion of the response
- "guardrail_intervened": Guardrail system intervened
- "interrupt": Agent was interrupted for human input
- "max_token_budget": Agent-level cumulative token budget exhausted
- "max_tokens": Maximum token limit reached
- "max_turns": Agent-level turn limit exhausted
- "stop_sequence": Stop sequence encountered
- "tool_use": Model requested to use a tool
"""
Loading