Skip to content
Open
16 changes: 16 additions & 0 deletions src/google/adk/models/google_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@
"""


def _remove_old_thought_signatures(contents: list[types.Content]) -> None:
"""Keeps only the latest thought signature in Gemini request contents."""
latest_signature_seen = False
for content in reversed(contents):
if not content.parts:
continue
for part in reversed(content.parts):
if part.thought_signature is None:
continue
if latest_signature_seen:
part.thought_signature = None
else:
latest_signature_seen = True


class _ResourceExhaustedError(ClientError):
"""Represents a resources exhausted error received from the Model."""

Expand Down Expand Up @@ -195,6 +210,7 @@ async def generate_content_async(
"""
await self._preprocess_request(llm_request)
self._maybe_append_user_content(llm_request)
_remove_old_thought_signatures(llm_request.contents)

# Handle context caching if configured
cache_metadata = None
Expand Down
47 changes: 47 additions & 0 deletions tests/unittests/models/test_google_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,53 @@ async def mock_coro():
mock_client.aio.models.generate_content.assert_called_once()


@pytest.mark.asyncio
async def test_generate_content_async_keeps_only_latest_thought_signature(
gemini_llm, generate_content_response
):
"""Gemini requests keep only the newest thought signature."""

def _function_call_part(name, signature):
return Part(
function_call=types.FunctionCall(name=name, args={}),
thought_signature=signature,
)

old_part = _function_call_part("first_tool", b"old")
newer_part = _function_call_part("second_tool", b"newer")
latest_part = _function_call_part("third_tool", b"latest")
llm_request = LlmRequest(
model="gemini-2.5-flash",
contents=[
Content(role="model", parts=[old_part]),
Content(role="user", parts=[Part.from_text(text="tool result")]),
Content(role="model", parts=[newer_part, latest_part]),
],
)

with mock.patch.object(gemini_llm, "api_client") as mock_client:

async def mock_coro():
return generate_content_response

mock_client.aio.models.generate_content.return_value = mock_coro()

responses = [
resp
async for resp in gemini_llm.generate_content_async(
llm_request, stream=False
)
]

assert len(responses) == 1
request_contents = mock_client.aio.models.generate_content.call_args.kwargs[
"contents"
]
assert request_contents[0].parts[0].thought_signature is None
assert request_contents[2].parts[0].thought_signature is None
assert request_contents[2].parts[1].thought_signature == b"latest"


@pytest.mark.asyncio
async def test_generate_content_async_stream(gemini_llm, llm_request):
with mock.patch.object(gemini_llm, "api_client") as mock_client:
Expand Down