Test message re-ordering fixes

danielmillerp · danielmillerp · commit ff27918eecdf · 2026-01-20T17:00:19.000-05:00
diff --git a/.github/workflows/build-and-push-tutorial-agent.yml b/.github/workflows/build-and-push-tutorial-agent.yml
@@ -190,6 +190,10 @@ jobs:
             SHOULD_PUSH=false
             VERSION_TAG="${{ github.sha }}"
             echo "🔍 Building agent for validation: ${{ matrix.agent_path }}"
+            # Set full image name for validation step (local build)
+            echo "FULL_IMAGE=${REGISTRY}/${REPOSITORY_NAME}:${VERSION_TAG}" >> $GITHUB_ENV
+            # Skip image validation for PRs since Buildx doesn't load multi-platform images locally
+            echo "SKIP_VALIDATION=true" >> $GITHUB_ENV
           fi
 
           # Always build locally first (without push)
@@ -203,6 +207,7 @@ jobs:
           echo "SHOULD_PUSH=${SHOULD_PUSH}" >> $GITHUB_ENV
 
       - name: Validate agent image
+        if: env.SKIP_VALIDATION != 'true'
         run: |
           set -e
 
diff --git a/examples/tutorials/00_sync/010_multiturn/project/acp.py b/examples/tutorials/00_sync/010_multiturn/project/acp.py
@@ -67,6 +67,7 @@ async def handle_message_send(
     #########################################################
 
     task_messages = await adk.messages.list(task_id=params.task.id)
+    task_messages = list(reversed(task_messages))  # API returns newest first, reverse to chronological order
 
     #########################################################
     # 3. Run the agent with OpenAI Agents SDK
diff --git a/examples/tutorials/00_sync/010_multiturn/tests/test_agent.py b/examples/tutorials/00_sync/010_multiturn/tests/test_agent.py
@@ -57,18 +57,27 @@ class TestNonStreamingMessages:
     """Test non-streaming message sending."""
 
     def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
+        """
+        Test message ordering by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
         task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
 
         assert task is not None
 
-        messages = [
-            "Hello, can you tell me a litle bit about tennis? I want to you make sure you use the word 'tennis' in each response.",
-            "Pick one of the things you just mentioned, and dive deeper into it.",
-            "Can you now output a summary of this conversation",
+        # Each message asks about a distinct topic with a required keyword in response
+        # This validates message ordering: if order is wrong, agent responds about wrong topic
+        messages_and_expected_keywords = [
+            ("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
+            ("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
+            ("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
         ]
 
-        for i, msg in enumerate(messages):
+        for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
             response = client.agents.send_message(
                 agent_name=agent_name,
                 params=ParamsSendMessageRequest(
@@ -87,7 +96,8 @@ def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
                 content = message.content
                 assert content is not None
                 assert isinstance(content, TextContent) and isinstance(content.content, str)
-                validate_text_in_string("tennis", content.content)
+                # Validate response contains the expected keyword for THIS message's topic
+                validate_text_in_string(expected_keyword, content.content.lower())
 
             states = client.states.list(agent_id=agent_id, task_id=task.id)
             assert len(states) == 1
@@ -106,20 +116,29 @@ class TestStreamingMessages:
     """Test streaming message sending."""
 
     def test_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
-        """Test streaming messages in a multi-turn conversation."""
+        """
+        Test message ordering with streaming by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
 
         # create a task for this specific conversation
         task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
 
         assert task is not None
-        messages = [
-            "Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
-            "Pick one of the things you just mentioned, and dive deeper into it.",
-            "Can you now output a summary of this conversation",
+
+        # Each message asks about a distinct topic with a required keyword in response
+        # This validates message ordering: if order is wrong, agent responds about wrong topic
+        messages_and_expected_keywords = [
+            ("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
+            ("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
+            ("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
         ]
 
-        for i, msg in enumerate(messages):
+        for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
             stream = client.agents.send_message_stream(
                 agent_name=agent_name,
                 params=ParamsSendMessageRequest(
@@ -136,10 +155,9 @@ def test_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
             aggregated_content, chunks = collect_streaming_response(stream)
 
             assert len(chunks) == 1
-            # Get the actual content (prefer full_content if available, otherwise use aggregated)
 
-            # Validate that "tennis" appears in the response because that is what our model does
-            validate_text_in_string("tennis", aggregated_content)
+            # Validate response contains the expected keyword for THIS message's topic
+            validate_text_in_string(expected_keyword, aggregated_content.lower())
 
             states = client.states.list(task_id=task.id)
             assert len(states) == 1
diff --git a/examples/tutorials/00_sync/020_streaming/project/acp.py b/examples/tutorials/00_sync/020_streaming/project/acp.py
@@ -68,7 +68,7 @@ async def handle_message_send(
         state = StateModel.model_validate(task_state.state)
 
     task_messages = await adk.messages.list(task_id=params.task.id)
-
+    task_messages = list(reversed(task_messages))  # API returns newest first, reverse to chronological order
 
     # Initialize the provider and run config to allow for tracing
     provider = SyncStreamingProvider(
diff --git a/examples/tutorials/00_sync/020_streaming/tests/test_agent.py b/examples/tutorials/00_sync/020_streaming/tests/test_agent.py
@@ -19,7 +19,7 @@
 import os
 
 import pytest
-from test_utils.sync import collect_streaming_response
+from test_utils.sync import validate_text_in_string, collect_streaming_response
 
 from agentex import Agentex
 from agentex.types import TextContent, TextContentParam
@@ -57,19 +57,27 @@ class TestNonStreamingMessages:
     """Test non-streaming message sending."""
 
     def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
-        """Test sending a message and receiving a response."""
+        """
+        Test message ordering by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
         task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
 
         assert task is not None
 
-        messages = [
-            "Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
-            "Pick one of the things you just mentioned, and dive deeper into it.",
-            "Can you now output a summary of this conversation",
+        # Each message asks about a distinct topic with a required keyword in response
+        # This validates message ordering: if order is wrong, agent responds about wrong topic
+        messages_and_expected_keywords = [
+            ("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
+            ("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
+            ("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
         ]
 
-        for i, msg in enumerate(messages):
+        for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
             response = client.agents.send_message(
                 agent_name=agent_name,
                 params=ParamsSendMessageRequest(
@@ -88,6 +96,8 @@ def test_send_message(self, client: Agentex, agent_name: str, agent_id: str):
                 content = message.content
                 assert content is not None
                 assert isinstance(content, TextContent) and isinstance(content.content, str)
+                # Validate response contains the expected keyword for THIS message's topic
+                validate_text_in_string(expected_keyword, content.content.lower())
 
             states = client.states.list(agent_id=agent_id, task_id=task.id)
             assert len(states) == 1
@@ -105,19 +115,28 @@ class TestStreamingMessages:
     """Test streaming message sending."""
 
     def test_send_stream_message(self, client: Agentex, agent_name: str, agent_id: str):
-        """Test streaming messages in a multi-turn conversation."""
+        """
+        Test message ordering with streaming by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
         # create a task for this specific conversation
         task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
 
         assert task is not None
-        messages = [
-            "Hello, can you tell me a little bit about tennis? I want you to make sure you use the word 'tennis' in each response.",
-            "Pick one of the things you just mentioned, and dive deeper into it.",
-            "Can you now output a summary of this conversation",
+
+        # Each message asks about a distinct topic with a required keyword in response
+        # This validates message ordering: if order is wrong, agent responds about wrong topic
+        messages_and_expected_keywords = [
+            ("Tell me about tennis. You must include the word 'tennis' in your response.", "tennis"),
+            ("Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis.", "basketball"),
+            ("Now tell me about soccer. You must include the word 'soccer' in your response. Do not mention tennis or basketball.", "soccer"),
         ]
 
-        for i, msg in enumerate(messages):
+        for i, (msg, expected_keyword) in enumerate(messages_and_expected_keywords):
             stream = client.agents.send_message_stream(
                 agent_name=agent_name,
                 params=ParamsSendMessageRequest(
@@ -137,6 +156,9 @@ def test_send_stream_message(self, client: Agentex, agent_name: str, agent_id: s
             # this is using the chat_completion_stream, so we will be getting chunks of data
             assert len(chunks) > 1, "No chunks received in streaming response."
 
+            # Validate response contains the expected keyword for THIS message's topic
+            validate_text_in_string(expected_keyword, aggregated_content.lower())
+
             states = client.states.list(agent_id=agent_id, task_id=task.id)
             assert len(states) == 1
 
diff --git a/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py b/examples/tutorials/10_async/00_base/040_other_sdks/tests/test_agent.py
@@ -171,7 +171,13 @@ async def test_send_event_and_poll_with_tool_use(self, client: AsyncAgentex, age
 
     @pytest.mark.asyncio
     async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, agent_id: str):
-        """Test multiple turns of conversation with state preservation."""
+        """
+        Test message ordering by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
         # Create a task for this conversation
         task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
@@ -180,8 +186,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
         # ensure the task is created before we send the first event
         await asyncio.sleep(1)
 
-        # First turn
-        user_message_1 = "My favorite color is blue."
+        # First turn - ask about tennis
+        user_message_1 = "Tell me about tennis. You must include the word 'tennis' in your response."
         first_turn_response_found = False
         async for message in send_event_and_poll_yielding(
             client=client,
@@ -198,6 +204,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
                 and message.content.author == "agent"
                 and message.content.content
             ):
+                # Validate response is about tennis
+                assert "tennis" in message.content.content.lower(), "First response should be about tennis"
                 first_turn_response_found = True
                 break
 
@@ -219,8 +227,9 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
 
         await asyncio.sleep(1)
 
-        # Second turn - reference previous context
-        user_message_2 = "What did I just tell you my favorite color was?"
+        # Second turn - ask about basketball (different topic)
+        # If message ordering is wrong, agent might respond about tennis instead
+        user_message_2 = "Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis."
         second_turn_response_found = False
         async for message in send_event_and_poll_yielding(
             client=client,
@@ -237,7 +246,8 @@ async def test_multi_turn_conversation_with_state(self, client: AsyncAgentex, ag
                 and message.content.content
             ):
                 response_text = message.content.content.lower()
-                assert "blue" in response_text
+                # Validate response is about basketball, not tennis
+                assert "basketball" in response_text, f"Second response should be about basketball, got: {response_text}"
                 second_turn_response_found = True
                 break
 
diff --git a/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/010_agent_chat/tests/test_agent.py
@@ -143,7 +143,13 @@ async def test_send_event_and_poll_with_calculator(self, client: AsyncAgentex, a
 
     @pytest.mark.asyncio
     async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str):
-        """Test multiple turns of conversation with state preservation."""
+        """
+        Test message ordering by sending messages about distinct topics.
+
+        This validates that the agent receives messages in chronological order.
+        If messages are reversed (newest first), the agent would respond about
+        the wrong topic.
+        """
         # Create a task for this conversation
         task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
         task = task_response.result
@@ -152,8 +158,8 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
         # Wait for workflow to initialize
         await asyncio.sleep(1)
 
-        # First turn
-        user_message_1 = "My favorite color is blue."
+        # First turn - ask about tennis
+        user_message_1 = "Tell me about tennis. You must include the word 'tennis' in your response."
         first_turn_found = False
         async for message in send_event_and_poll_yielding(
             client=client,
@@ -170,6 +176,8 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
                 and message.content.author == "agent"
                 and message.content.content
             ):
+                # Validate response is about tennis
+                assert "tennis" in message.content.content.lower(), "First response should be about tennis"
                 first_turn_found = True
                 break
 
@@ -178,9 +186,10 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
         # Wait a bit for state to update
         await asyncio.sleep(2)
 
-        # Second turn - reference previous context
+        # Second turn - ask about basketball (different topic)
+        # If message ordering is wrong, agent might respond about tennis instead
         found_response = False
-        user_message_2 = "What did I just tell you my favorite color was?"
+        user_message_2 = "Now tell me about basketball. You must include the word 'basketball' in your response. Do not mention tennis."
         async for message in send_event_and_poll_yielding(
             client=client,
             agent_id=agent_id,
@@ -196,11 +205,12 @@ async def test_multi_turn_conversation(self, client: AsyncAgentex, agent_id: str
                 and message.content.content
             ):
                 response_text = message.content.content.lower()
-                assert "blue" in response_text, f"Expected 'blue' in response but got: {response_text}"
+                # Validate response is about basketball, not tennis
+                assert "basketball" in response_text, f"Second response should be about basketball, got: {response_text}"
                 found_response = True
                 break
 
-        assert found_response, "Did not receive final agent text response with context recall"
+        assert found_response, "Did not receive final agent text response with correct topic"
 
 
 class TestStreamingEvents: