Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 84 additions & 16 deletions core/src/main/java/com/google/adk/models/Gemini.java
Original file line number Diff line number Diff line change
Expand Up @@ -226,21 +226,7 @@ public Flowable<LlmResponse> generateContent(LlmRequest llmRequest, boolean stre
() ->
processRawResponses(
Flowable.fromFuture(streamFuture).flatMapIterable(iterable -> iterable)))
.filter(
llmResponse ->
llmResponse
.content()
.flatMap(Content::parts)
.map(
parts ->
!parts.isEmpty()
&& parts.stream()
.anyMatch(
p ->
p.functionCall().isPresent()
|| p.functionResponse().isPresent()
|| p.text().isPresent()))
.orElse(false));
.filter(Gemini::shouldEmit);
} else {
logger.debug("Sending generateContent request to model {}", effectiveModelName);
return Flowable.fromFuture(
Expand Down Expand Up @@ -298,7 +284,28 @@ static Flowable<LlmResponse> processRawResponses(Flowable<GenerateContentRespons
responsesToEmit.add(aggregatedTextResponse);
accumulatedText.setLength(0);
}
responsesToEmit.add(currentProcessedLlmResponse);
if (isEmptyTextOnlyResponse(currentProcessedLlmResponse)) {
// Strip the empty-text content while preserving any carried metadata
// (`usageMetadata`, `finishReason`, `modelVersion`, etc.) by emitting a
// content-less response marked as `partial`. This handles the trailing
// `{parts:[{text:""}], finishReason:STOP}` chunk emitted by some Gemini
// preview models (e.g. 3.1-flash-lite) after a function call: keeping
// the chunk as-is would propagate it as a non-partial event whose
// Event#finalResponse() returns true and prematurely terminate
// BaseLlmFlow#run before the function response is sent back to the
// model; dropping it entirely would lose the carried metadata. If the
// chunk carries no useful metadata at all, suppress it outright.
LlmResponse metadataOnly =
currentProcessedLlmResponse.toBuilder()
.content((Content) null)
.partial(true)
.build();
if (hasUsefulMetadata(metadataOnly)) {
responsesToEmit.add(metadataOnly);
}
} else {
responsesToEmit.add(currentProcessedLlmResponse);
}
}
logger.debug("Responses to emit: {}", responsesToEmit);
return Flowable.fromIterable(responsesToEmit);
Expand Down Expand Up @@ -358,6 +365,67 @@ private static LlmResponse thinkingResponseFromText(String accumulatedThoughtTex
.build();
}

/**
* Returns true if {@code response} should be emitted downstream by the streaming pipeline.
*
* <p>Drops chunks that carry neither semantic content (i.e. they are an empty-text-only response
* per {@link #isEmptyTextOnlyResponse}) nor any useful metadata (per {@link #hasUsefulMetadata}).
*
* <p>Package-private for testing.
*/
static boolean shouldEmit(LlmResponse response) {
return !isEmptyTextOnlyResponse(response) || hasUsefulMetadata(response);
}

/**
* Returns true if {@code response} carries any non-content metadata that should be propagated
* downstream (e.g. {@code usageMetadata}, {@code finishReason}, transcriptions, grounding or
* error info). Inspects only top-level {@link LlmResponse} fields; the response's content/parts
* are intentionally not considered here.
*/
private static boolean hasUsefulMetadata(LlmResponse response) {
return response.usageMetadata().isPresent()
|| response.finishReason().isPresent()
|| response.errorCode().isPresent()
|| response.groundingMetadata().isPresent()
|| response.inputTranscription().isPresent()
|| response.outputTranscription().isPresent();
}

/**
* Returns true if {@code response} consists of exactly one {@link Part} whose only meaningful
* payload is an empty text string (i.e. {@code parts:[{text:""}]}). Such a chunk can be safely
* dropped from the streaming aggregator because it carries no semantic content for the agent
* pipeline. A part is considered to carry semantic content if any of its non-text payloads
* ({@code functionCall}, {@code functionResponse}, {@code inlineData}, {@code executableCode},
* {@code codeExecutionResult}, {@code fileData}, {@code thoughtSignature}, {@code videoMetadata},
* {@code toolCall}, {@code toolResponse}) is present.
*/
private static boolean isEmptyTextOnlyResponse(LlmResponse response) {
return response
.content()
.flatMap(Content::parts)
.map(
parts -> {
if (parts.size() != 1) {
return false;
}
Part part = parts.get(0);
return part.text().map(String::isEmpty).orElse(false)
&& part.functionCall().isEmpty()
&& part.functionResponse().isEmpty()
&& part.inlineData().isEmpty()
&& part.executableCode().isEmpty()
&& part.codeExecutionResult().isEmpty()
&& part.fileData().isEmpty()
&& part.thoughtSignature().isEmpty()
&& part.videoMetadata().isEmpty()
&& part.toolCall().isEmpty()
&& part.toolResponse().isEmpty();
})
.orElse(false);
}

@Override
public BaseLlmConnection connect(LlmRequest llmRequest) {
if (!apiClient.vertexAI()) {
Expand Down
182 changes: 182 additions & 0 deletions core/src/test/java/com/google/adk/models/GeminiTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,81 @@ public void processRawResponses_withTextChunks_emitsPartialResponses() {
isFunctionCallResponse());
}

// Regression test for b/513501918. gemini-3.1-flash-lite emits an extra trailing chunk after a
// function call: `{parts:[{text:""}], finishReason:STOP}`. That chunk must not be propagated as
// a non-partial event because BaseLlmFlow#run would treat it as the final response and
// terminate the loop before the function response is sent back to the model. The chunk's
// metadata (e.g. `finishReason`, `usageMetadata`) is preserved by emitting it on a content-less
// partial response instead of dropping the chunk entirely.
@Test
public void
processRawResponses_functionCallThenEmptyTextWithStop_emitsFunctionCallAndMetadataOnlyPartial() {
Flowable<GenerateContentResponse> rawResponses =
Flowable.just(
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
toResponseWithText("", FinishReason.Known.STOP));

Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);

assertLlmResponses(
llmResponses,
isFunctionCallResponse(),
isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
}

// Same as above but with `usageMetadata` on the trailing empty chunk: the metadata must survive
// on the emitted content-less partial.
@Test
public void
processRawResponses_functionCallThenEmptyTextWithUsageMetadata_preservesUsageMetadata() {
GenerateContentResponseUsageMetadata metadata = createUsageMetadata(5, 10, 15);
Flowable<GenerateContentResponse> rawResponses =
Flowable.just(
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
toResponseWithText("", FinishReason.Known.STOP, metadata));

Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);

assertLlmResponses(
llmResponses, isFunctionCallResponse(), isContentlessPartialWithUsageMetadata(metadata));
}

// Same as above but without a finishReason or usageMetadata: the trailing empty chunk carries no
// useful payload and must be suppressed entirely.
@Test
public void processRawResponses_functionCallThenEmptyText_doesNotEmitExtraEmptyResponse() {
Flowable<GenerateContentResponse> rawResponses =
Flowable.just(
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
toResponseWithText(""));

Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);

assertLlmResponses(llmResponses, isFunctionCallResponse());
}

// Combined scenario: leading partial text, then a function call, then the trailing empty-text
// chunk with STOP. Accumulated text must still be flushed, the function call must still be
// emitted, and the trailing chunk must surface only its metadata on a content-less partial.
@Test
public void
processRawResponses_textThenFunctionCallThenEmptyTextWithStop_emitsTextFunctionCallAndMetadata() {
Flowable<GenerateContentResponse> rawResponses =
Flowable.just(
toResponseWithText("Thinking..."),
toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())),
toResponseWithText("", FinishReason.Known.STOP));

Flowable<LlmResponse> llmResponses = Gemini.processRawResponses(rawResponses);

assertLlmResponses(
llmResponses,
isPartialTextResponse("Thinking..."),
isFinalTextResponse("Thinking..."),
isFunctionCallResponse(),
isContentlessPartialWithFinishReason(FinishReason.Known.STOP));
}

@Test
public void processRawResponses_textAndStopReason_emitsPartialThenFinalText() {
Flowable<GenerateContentResponse> rawResponses =
Expand Down Expand Up @@ -175,6 +250,93 @@ public void processRawResponses_thoughtChunksAndStop_includeUsageMetadata() {
isFinalThoughtResponseWithUsageMetadata("Thinking deeply", metadata2));
}

// Test cases for the shouldEmit filter applied by generateContent after processRawResponses.
// shouldEmit drops chunks that are empty-text-only AND carry no useful metadata; everything else
// is forwarded. processRawResponses normally already strips empty-text-only chunks, so shouldEmit
// is defense-in-depth, but it must still behave correctly when fed any LlmResponse directly.

@Test
public void shouldEmit_emptyTextOnlyResponseWithNoMetadata_returnsFalse() {
LlmResponse response =
LlmResponse.builder()
.content(Content.builder().role("model").parts(Part.fromText("")).build())
.build();

assertThat(Gemini.shouldEmit(response)).isFalse();
}

@Test
public void shouldEmit_emptyTextOnlyResponseWithFinishReason_returnsTrue() {
LlmResponse response =
LlmResponse.builder()
.content(Content.builder().role("model").parts(Part.fromText("")).build())
.finishReason(new FinishReason(FinishReason.Known.STOP))
.build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void shouldEmit_emptyTextOnlyResponseWithUsageMetadata_returnsTrue() {
LlmResponse response =
LlmResponse.builder()
.content(Content.builder().role("model").parts(Part.fromText("")).build())
.usageMetadata(createUsageMetadata(5, 10, 15))
.build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void shouldEmit_nonEmptyTextResponse_returnsTrue() {
LlmResponse response =
LlmResponse.builder()
.content(Content.builder().role("model").parts(Part.fromText("hello")).build())
.build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void shouldEmit_functionCallResponse_returnsTrue() {
LlmResponse response =
LlmResponse.builder()
.content(
Content.builder()
.role("model")
.parts(Part.fromFunctionCall("test_function", ImmutableMap.of()))
.build())
.build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void shouldEmit_contentlessResponse_returnsTrue() {
// A response with no content at all is not an empty-text-only response, so it should pass
// through regardless of metadata. This is the shape emitted by processRawResponses after it
// strips empty-text content while preserving metadata.
LlmResponse response = LlmResponse.builder().build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void shouldEmit_multiPartResponseWithEmptyTextPart_returnsTrue() {
// Only single-part empty-text responses are considered "empty-text-only". A multi-part response
// is treated as carrying semantic content and must always pass through.
LlmResponse response =
LlmResponse.builder()
.content(
Content.builder()
.role("model")
.parts(Part.fromText(""), Part.fromText("hello"))
.build())
.build();

assertThat(Gemini.shouldEmit(response)).isTrue();
}

@Test
public void processRawResponses_thoughtAndTextWithStop_onlyFinalTextIncludesUsageMetadata() {
GenerateContentResponseUsageMetadata metadata1 = createUsageMetadata(5, 5, 10);
Expand Down Expand Up @@ -232,6 +394,26 @@ private static Predicate<LlmResponse> isFunctionCallResponse() {
};
}

private static Predicate<LlmResponse> isContentlessPartialWithFinishReason(
FinishReason.Known expectedFinishReason) {
return response -> {
assertThat(response.partial()).hasValue(true);
assertThat(response.content()).isEmpty();
assertThat(response.finishReason().map(fr -> fr.knownEnum())).hasValue(expectedFinishReason);
return true;
};
}

private static Predicate<LlmResponse> isContentlessPartialWithUsageMetadata(
GenerateContentResponseUsageMetadata expectedMetadata) {
return response -> {
assertThat(response.partial()).hasValue(true);
assertThat(response.content()).isEmpty();
assertThat(response.usageMetadata()).hasValue(expectedMetadata);
return true;
};
}

private static Predicate<LlmResponse> isEmptyResponse() {
return response -> {
assertThat(response.partial()).isEmpty();
Expand Down
Loading