fix: strip empty text parts in streaming responses to prevent skipped tool execution

akashbangad · akashbangad · commit 40d72a48448d · 2026-03-13T19:33:47.000+01:00
Some Gemini models (e.g. gemini-3.1-pro-preview) return function_call + text=""
in the same streaming response. The empty text part was being treated as a final
answer by the flow layer, preventing the second LLM call that should happen after
tool execution.

This fix strips empty text="" parts (where text is an empty string, not None) in
both progressive and non-progressive SSE streaming paths of
StreamingResponseAggregator.process_response().

Progressive path: added explicit `continue` to skip parts with text="" before
they reach the else branch (which would add them to _parts_sequence as "other"
non-text parts).

Non-progressive path: filter out empty text parts from llm_response.content.parts
before the existing text-accumulation logic runs, so they are never yielded as
standalone non-partial responses.
diff --git a/src/google/adk/utils/streaming_utils.py b/src/google/adk/utils/streaming_utils.py
@@ -268,6 +268,11 @@ async def process_response(
       # Only merge consecutive text parts of the same type (thought or regular)
       if llm_response.content and llm_response.content.parts:
         for part in llm_response.content.parts:
+          # Skip empty text parts (text="") that some models return
+          # alongside function_call parts. These carry no content and
+          # can cause the flow layer to treat them as a final response.
+          if part.text == '' and not part.thought:
+            continue
           if part.text:
             # Check if we need to flush the current buffer first
             # (when text type changes from thought to regular or vice versa)
@@ -297,6 +302,19 @@ async def process_response(
       return
 
     # ========== Non-Progressive SSE Streaming (old behavior) ==========
+
+    # Strip empty text parts (text="") that some models return alongside
+    # function_call parts in the same streaming response. Without this,
+    # the empty-text part is yielded as a non-partial response, which
+    # downstream (base_llm_flow) treats as a final answer — preventing
+    # the tool-result continuation call from ever being made.
+    if llm_response.content and llm_response.content.parts:
+      llm_response.content.parts = [
+          p
+          for p in llm_response.content.parts
+          if not (p.text == '' and not p.thought)
+      ]
+
     if (
         llm_response.content
         and llm_response.content.parts
diff --git a/tests/unittests/utils/test_streaming_utils.py b/tests/unittests/utils/test_streaming_utils.py
@@ -304,3 +304,103 @@ async def run_test():
         await run_test()
     else:
       await run_test()
+
+  @pytest.mark.asyncio
+  async def test_empty_text_with_function_call_non_progressive(self):
+    """Empty text="" parts should be stripped so they don't become false final responses.
+
+    Some Gemini models return function_call + text="" in a single streaming
+    response. Without stripping, the empty-text chunk is yielded as a
+    non-partial LlmResponse, which base_llm_flow interprets as a final answer
+    and never makes the second LLM call after tool execution.
+    """
+    with temporary_feature_override(
+        FeatureName.PROGRESSIVE_SSE_STREAMING, False
+    ):
+      aggregator = streaming_utils.StreamingResponseAggregator()
+
+      # Chunk 1: function_call
+      response_fc = types.GenerateContentResponse(
+          candidates=[
+              types.Candidate(
+                  content=types.Content(
+                      parts=[
+                          types.Part.from_function_call(
+                              name="list_directory",
+                              args={"path": "/tmp"},
+                          )
+                      ]
+                  )
+              )
+          ]
+      )
+      # Chunk 2: empty text (the problematic part)
+      response_empty = types.GenerateContentResponse(
+          candidates=[
+              types.Candidate(
+                  content=types.Content(parts=[types.Part(text="")]),
+                  finish_reason=types.FinishReason.STOP,
+              )
+          ]
+      )
+
+      results_fc = []
+      async for r in aggregator.process_response(response_fc):
+        results_fc.append(r)
+
+      results_empty = []
+      async for r in aggregator.process_response(response_empty):
+        results_empty.append(r)
+
+      # The function_call chunk should be yielded (not partial — it has no text)
+      assert len(results_fc) == 1
+      fc_parts = results_fc[0].content.parts
+      assert any(p.function_call for p in fc_parts)
+
+      # The empty-text chunk should have its empty text part stripped.
+      # It must NOT contain a text="" part that could be mistaken for a
+      # final answer.
+      assert len(results_empty) == 1
+      for p in results_empty[0].content.parts:
+        if p.text is not None:
+          assert p.text != '', (
+              "Empty text part was not stripped — this causes the flow layer "
+              "to treat it as a final response and skip tool execution"
+          )
+
+  @pytest.mark.asyncio
+  async def test_empty_text_with_function_call_progressive(self):
+    """Progressive mode should also ignore empty text="" parts."""
+    with temporary_feature_override(
+        FeatureName.PROGRESSIVE_SSE_STREAMING, True
+    ):
+      aggregator = streaming_utils.StreamingResponseAggregator()
+
+      # Single response with function_call + empty text
+      response = types.GenerateContentResponse(
+          candidates=[
+              types.Candidate(
+                  content=types.Content(
+                      parts=[
+                          types.Part.from_function_call(
+                              name="run_shell",
+                              args={"command": "ls"},
+                          ),
+                          types.Part(text=""),
+                      ]
+                  ),
+                  finish_reason=types.FinishReason.STOP,
+              )
+          ]
+      )
+
+      async for _ in aggregator.process_response(response):
+        pass
+
+      closed = aggregator.close()
+      assert closed is not None
+      # Should have the function_call but NOT the empty text
+      assert any(p.function_call for p in closed.content.parts)
+      for p in closed.content.parts:
+        if p.text is not None:
+          assert p.text != '', "Empty text part leaked into progressive aggregation"