diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py index 36bdb5a05..fb8b0bcdd 100644 --- a/src/strands/models/litellm.py +++ b/src/strands/models/litellm.py @@ -220,11 +220,14 @@ def _format_system_messages( for block in system_prompt_content or []: if "text" in block: system_content.append({"type": "text", "text": block["text"]}) - elif "cachePoint" in block and block["cachePoint"].get("type") == "default": + elif "cachePoint" in block and block["cachePoint"]["type"] == "default": # Apply cache control to the immediately preceding content block # for LiteLLM/Anthropic compatibility if system_content: - system_content[-1]["cache_control"] = {"type": "ephemeral"} + cache_control: dict[str, Any] = {"type": "ephemeral"} + if ttl := block["cachePoint"].get("ttl"): + cache_control["ttl"] = ttl + system_content[-1]["cache_control"] = cache_control # Create single system message with content array rather than mulitple system messages return [{"role": "system", "content": system_content}] if system_content else [] diff --git a/src/strands/types/content.py b/src/strands/types/content.py index 8db1d1d98..5f9cc1460 100644 --- a/src/strands/types/content.py +++ b/src/strands/types/content.py @@ -67,9 +67,12 @@ class CachePoint(TypedDict): Attributes: type: The type of cache point, typically "default". + ttl: Optional cache TTL duration (e.g. "5m", "1h"). Supported by providers + that accept Anthropic-compatible cache_control fields. """ type: str + ttl: NotRequired[str] class ContentBlock(TypedDict, total=False): diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py index d35a1806e..96cf561cd 100644 --- a/tests/strands/models/test_litellm.py +++ b/tests/strands/models/test_litellm.py @@ -955,6 +955,39 @@ def test_format_request_message_tool_call_no_reasoning_signature(): assert "__thought__" not in result["id"] +def test_format_system_messages_preserves_cache_point_ttl(): + """CachePoint with ttl="1h" should produce cache_control with ttl field.""" + result = LiteLLMModel._format_system_messages( + system_prompt_content=[ + {"text": "You are a helpful assistant."}, + {"cachePoint": {"type": "default", "ttl": "1h"}}, + ] + ) + assert result[0]["content"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} + + +def test_format_system_messages_cache_point_without_ttl(): + """CachePoint without ttl should produce cache_control with no ttl key (backward compat).""" + result = LiteLLMModel._format_system_messages( + system_prompt_content=[ + {"text": "You are a helpful assistant."}, + {"cachePoint": {"type": "default"}}, + ] + ) + assert result[0]["content"][0]["cache_control"] == {"type": "ephemeral"} + assert "ttl" not in result[0]["content"][0]["cache_control"] + + +def test_format_system_messages_cache_point_with_no_preceding_content(): + """CachePoint with no preceding text block should be silently ignored.""" + result = LiteLLMModel._format_system_messages( + system_prompt_content=[ + {"cachePoint": {"type": "default", "ttl": "1h"}}, + ] + ) + assert result == [] + + def test_thought_signature_round_trip(): """Test that thought signature is preserved through a full response -> internal -> request cycle.""" model = LiteLLMModel(model_id="test")